Linux-PCI Archive on lore.kernel.org
 help / color / Atom feed
* dev_pagemap related cleanups
@ 2019-06-13  9:43 Christoph Hellwig
  2019-06-13  9:43 ` [PATCH 01/22] mm: remove the unused ARCH_HAS_HMM_DEVICE Kconfig option Christoph Hellwig
                   ` (23 more replies)
  0 siblings, 24 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

Hi Dan, Jérôme and Jason,

below is a series that cleans up the dev_pagemap interface so that
it is more easily usable, which removes the need to wrap it in hmm
and thus allowing to kill a lot of code

Diffstat:

 22 files changed, 245 insertions(+), 802 deletions(-)

Git tree:

    git://git.infradead.org/users/hch/misc.git hmm-devmem-cleanup

Gitweb:

    http://git.infradead.org/users/hch/misc.git/shortlog/refs/heads/hmm-devmem-cleanup

^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 01/22] mm: remove the unused ARCH_HAS_HMM_DEVICE Kconfig option
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 18:30   ` Jason Gunthorpe
  2019-06-13  9:43 ` [PATCH 02/22] mm: remove the struct hmm_device infrastructure Christoph Hellwig
                   ` (22 subsequent siblings)
  23 siblings, 1 reply; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 mm/Kconfig | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/mm/Kconfig b/mm/Kconfig
index f0c76ba47695..0d2ba7e1f43e 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -675,16 +675,6 @@ config ARCH_HAS_HMM_MIRROR
 	depends on (X86_64 || PPC64)
 	depends on MMU && 64BIT
 
-config ARCH_HAS_HMM_DEVICE
-	bool
-	default y
-	depends on (X86_64 || PPC64)
-	depends on MEMORY_HOTPLUG
-	depends on MEMORY_HOTREMOVE
-	depends on SPARSEMEM_VMEMMAP
-	depends on ARCH_HAS_ZONE_DEVICE
-	select XARRAY_MULTI
-
 config ARCH_HAS_HMM
 	bool
 	default y
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 02/22] mm: remove the struct hmm_device infrastructure
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
  2019-06-13  9:43 ` [PATCH 01/22] mm: remove the unused ARCH_HAS_HMM_DEVICE Kconfig option Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 18:46   ` Jason Gunthorpe
  2019-06-13 23:06   ` [Nouveau] " John Hubbard
  2019-06-13  9:43 ` [PATCH 03/22] mm: remove hmm_devmem_add_resource Christoph Hellwig
                   ` (21 subsequent siblings)
  23 siblings, 2 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

This code is a trivial wrapper around device model helpers, which
should have been integrated into the driver device model usage from
the start.  Assuming it actually had users, which it never had since
the code was added more than 1 1/2 years ago.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/hmm.h | 20 ------------
 mm/hmm.c            | 80 ---------------------------------------------
 2 files changed, 100 deletions(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 0fa8ea34ccef..4867b9da1b6c 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -717,26 +717,6 @@ static inline unsigned long hmm_devmem_page_get_drvdata(const struct page *page)
 {
 	return page->hmm_data;
 }
-
-
-/*
- * struct hmm_device - fake device to hang device memory onto
- *
- * @device: device struct
- * @minor: device minor number
- */
-struct hmm_device {
-	struct device		device;
-	unsigned int		minor;
-};
-
-/*
- * A device driver that wants to handle multiple devices memory through a
- * single fake device can use hmm_device to do so. This is purely a helper and
- * it is not strictly needed, in order to make use of any HMM functionality.
- */
-struct hmm_device *hmm_device_new(void *drvdata);
-void hmm_device_put(struct hmm_device *hmm_device);
 #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
 #else /* IS_ENABLED(CONFIG_HMM) */
 static inline void hmm_mm_destroy(struct mm_struct *mm) {}
diff --git a/mm/hmm.c b/mm/hmm.c
index 886b18695b97..ff2598eb7377 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1499,84 +1499,4 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
 	return devmem;
 }
 EXPORT_SYMBOL_GPL(hmm_devmem_add_resource);
-
-/*
- * A device driver that wants to handle multiple devices memory through a
- * single fake device can use hmm_device to do so. This is purely a helper
- * and it is not needed to make use of any HMM functionality.
- */
-#define HMM_DEVICE_MAX 256
-
-static DECLARE_BITMAP(hmm_device_mask, HMM_DEVICE_MAX);
-static DEFINE_SPINLOCK(hmm_device_lock);
-static struct class *hmm_device_class;
-static dev_t hmm_device_devt;
-
-static void hmm_device_release(struct device *device)
-{
-	struct hmm_device *hmm_device;
-
-	hmm_device = container_of(device, struct hmm_device, device);
-	spin_lock(&hmm_device_lock);
-	clear_bit(hmm_device->minor, hmm_device_mask);
-	spin_unlock(&hmm_device_lock);
-
-	kfree(hmm_device);
-}
-
-struct hmm_device *hmm_device_new(void *drvdata)
-{
-	struct hmm_device *hmm_device;
-
-	hmm_device = kzalloc(sizeof(*hmm_device), GFP_KERNEL);
-	if (!hmm_device)
-		return ERR_PTR(-ENOMEM);
-
-	spin_lock(&hmm_device_lock);
-	hmm_device->minor = find_first_zero_bit(hmm_device_mask, HMM_DEVICE_MAX);
-	if (hmm_device->minor >= HMM_DEVICE_MAX) {
-		spin_unlock(&hmm_device_lock);
-		kfree(hmm_device);
-		return ERR_PTR(-EBUSY);
-	}
-	set_bit(hmm_device->minor, hmm_device_mask);
-	spin_unlock(&hmm_device_lock);
-
-	dev_set_name(&hmm_device->device, "hmm_device%d", hmm_device->minor);
-	hmm_device->device.devt = MKDEV(MAJOR(hmm_device_devt),
-					hmm_device->minor);
-	hmm_device->device.release = hmm_device_release;
-	dev_set_drvdata(&hmm_device->device, drvdata);
-	hmm_device->device.class = hmm_device_class;
-	device_initialize(&hmm_device->device);
-
-	return hmm_device;
-}
-EXPORT_SYMBOL(hmm_device_new);
-
-void hmm_device_put(struct hmm_device *hmm_device)
-{
-	put_device(&hmm_device->device);
-}
-EXPORT_SYMBOL(hmm_device_put);
-
-static int __init hmm_init(void)
-{
-	int ret;
-
-	ret = alloc_chrdev_region(&hmm_device_devt, 0,
-				  HMM_DEVICE_MAX,
-				  "hmm_device");
-	if (ret)
-		return ret;
-
-	hmm_device_class = class_create(THIS_MODULE, "hmm_device");
-	if (IS_ERR(hmm_device_class)) {
-		unregister_chrdev_region(hmm_device_devt, HMM_DEVICE_MAX);
-		return PTR_ERR(hmm_device_class);
-	}
-	return 0;
-}
-
-device_initcall(hmm_init);
 #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 03/22] mm: remove hmm_devmem_add_resource
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
  2019-06-13  9:43 ` [PATCH 01/22] mm: remove the unused ARCH_HAS_HMM_DEVICE Kconfig option Christoph Hellwig
  2019-06-13  9:43 ` [PATCH 02/22] mm: remove the struct hmm_device infrastructure Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 18:52   ` Jason Gunthorpe
                     ` (2 more replies)
  2019-06-13  9:43 ` [PATCH 04/22] mm: don't clear ->mapping in hmm_devmem_free Christoph Hellwig
                   ` (20 subsequent siblings)
  23 siblings, 3 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

This function has never been used since it was first added to the kernel
more than a year and a half ago, and if we ever grow a consumer of the
MEMORY_DEVICE_PUBLIC infrastructure it can easily use devm_memremap_pages
directly now that we've simplified the API for it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/hmm.h |  3 ---
 mm/hmm.c            | 54 ---------------------------------------------
 2 files changed, 57 deletions(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 4867b9da1b6c..5761a39221a6 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -688,9 +688,6 @@ struct hmm_devmem {
 struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
 				  struct device *device,
 				  unsigned long size);
-struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
-					   struct device *device,
-					   struct resource *res);
 
 /*
  * hmm_devmem_page_set_drvdata - set per-page driver data field
diff --git a/mm/hmm.c b/mm/hmm.c
index ff2598eb7377..0c62426d1257 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1445,58 +1445,4 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
 	return devmem;
 }
 EXPORT_SYMBOL_GPL(hmm_devmem_add);
-
-struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
-					   struct device *device,
-					   struct resource *res)
-{
-	struct hmm_devmem *devmem;
-	void *result;
-	int ret;
-
-	if (res->desc != IORES_DESC_DEVICE_PUBLIC_MEMORY)
-		return ERR_PTR(-EINVAL);
-
-	dev_pagemap_get_ops();
-
-	devmem = devm_kzalloc(device, sizeof(*devmem), GFP_KERNEL);
-	if (!devmem)
-		return ERR_PTR(-ENOMEM);
-
-	init_completion(&devmem->completion);
-	devmem->pfn_first = -1UL;
-	devmem->pfn_last = -1UL;
-	devmem->resource = res;
-	devmem->device = device;
-	devmem->ops = ops;
-
-	ret = percpu_ref_init(&devmem->ref, &hmm_devmem_ref_release,
-			      0, GFP_KERNEL);
-	if (ret)
-		return ERR_PTR(ret);
-
-	ret = devm_add_action_or_reset(device, hmm_devmem_ref_exit,
-			&devmem->ref);
-	if (ret)
-		return ERR_PTR(ret);
-
-	devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
-	devmem->pfn_last = devmem->pfn_first +
-			   (resource_size(devmem->resource) >> PAGE_SHIFT);
-	devmem->page_fault = hmm_devmem_fault;
-
-	devmem->pagemap.type = MEMORY_DEVICE_PUBLIC;
-	devmem->pagemap.res = *devmem->resource;
-	devmem->pagemap.page_free = hmm_devmem_free;
-	devmem->pagemap.altmap_valid = false;
-	devmem->pagemap.ref = &devmem->ref;
-	devmem->pagemap.data = devmem;
-	devmem->pagemap.kill = hmm_devmem_ref_kill;
-
-	result = devm_memremap_pages(devmem->device, &devmem->pagemap);
-	if (IS_ERR(result))
-		return result;
-	return devmem;
-}
-EXPORT_SYMBOL_GPL(hmm_devmem_add_resource);
 #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 04/22] mm: don't clear ->mapping in hmm_devmem_free
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (2 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 03/22] mm: remove hmm_devmem_add_resource Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 19:05   ` Jason Gunthorpe
                     ` (2 more replies)
  2019-06-13  9:43 ` [PATCH 05/22] mm: export alloc_pages_vma Christoph Hellwig
                   ` (19 subsequent siblings)
  23 siblings, 3 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

->mapping isn't even used by HMM users, and the field at the same offset
in the zone_device part of the union is declared as pad.  (Which btw is
rather confusing, as DAX uses ->pgmap and ->mapping from two different
sides of the union, but DAX doesn't use hmm_devmem_free).

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 mm/hmm.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 0c62426d1257..e1dc98407e7b 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1347,8 +1347,6 @@ static void hmm_devmem_free(struct page *page, void *data)
 {
 	struct hmm_devmem *devmem = data;
 
-	page->mapping = NULL;
-
 	devmem->ops->free(devmem, page);
 }
 
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 05/22] mm: export alloc_pages_vma
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (3 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 04/22] mm: don't clear ->mapping in hmm_devmem_free Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-14  1:47   ` John Hubbard
  2019-06-20 19:17   ` Michal Hocko
  2019-06-13  9:43 ` [PATCH 06/22] mm: factor out a devm_request_free_mem_region helper Christoph Hellwig
                   ` (18 subsequent siblings)
  23 siblings, 2 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

noveau is currently using this through an odd hmm wrapper, and I plan
to switch it to the real thing later in this series.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 mm/mempolicy.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 01600d80ae01..f9023b5fba37 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2098,6 +2098,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 out:
 	return page;
 }
+EXPORT_SYMBOL_GPL(alloc_pages_vma);
 
 /**
  * 	alloc_pages_current - Allocate pages.
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 06/22] mm: factor out a devm_request_free_mem_region helper
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (4 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 05/22] mm: export alloc_pages_vma Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 19:16   ` Jason Gunthorpe
  2019-06-15  2:21   ` John Hubbard
  2019-06-13  9:43 ` [PATCH 07/22] memremap: move dev_pagemap callbacks into a separate structure Christoph Hellwig
                   ` (17 subsequent siblings)
  23 siblings, 2 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

Keep the physical address allocation that hmm_add_device does with the
rest of the resource code, and allow future reuse of it without the hmm
wrapper.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/ioport.h |  2 ++
 kernel/resource.c      | 39 +++++++++++++++++++++++++++++++++++++++
 mm/hmm.c               | 33 ++++-----------------------------
 3 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index da0ebaec25f0..76a33ae3bf6c 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -286,6 +286,8 @@ static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
        return (r1->start <= r2->end && r1->end >= r2->start);
 }
 
+struct resource *devm_request_free_mem_region(struct device *dev,
+		struct resource *base, unsigned long size);
 
 #endif /* __ASSEMBLY__ */
 #endif	/* _LINUX_IOPORT_H */
diff --git a/kernel/resource.c b/kernel/resource.c
index 158f04ec1d4f..99c58134ed1c 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1628,6 +1628,45 @@ void resource_list_free(struct list_head *head)
 }
 EXPORT_SYMBOL(resource_list_free);
 
+#ifdef CONFIG_DEVICE_PRIVATE
+/**
+ * devm_request_free_mem_region - find free region for device private memory
+ *
+ * @dev: device struct to bind the resource too
+ * @size: size in bytes of the device memory to add
+ * @base: resource tree to look in
+ *
+ * This function tries to find an empty range of physical address big enough to
+ * contain the new resource, so that it can later be hotpluged as ZONE_DEVICE
+ * memory, which in turn allocates struct pages.
+ */
+struct resource *devm_request_free_mem_region(struct device *dev,
+		struct resource *base, unsigned long size)
+{
+	resource_size_t end, addr;
+	struct resource *res;
+
+	size = ALIGN(size, 1UL << PA_SECTION_SHIFT);
+	end = min_t(unsigned long, base->end, (1UL << MAX_PHYSMEM_BITS) - 1);
+	addr = end - size + 1UL;
+
+	for (; addr > size && addr >= base->start; addr -= size) {
+		if (region_intersects(addr, size, 0, IORES_DESC_NONE) !=
+				REGION_DISJOINT)
+			continue;
+
+		res = devm_request_mem_region(dev, addr, size, dev_name(dev));
+		if (!res)
+			return ERR_PTR(-ENOMEM);
+		res->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY;
+		return res;
+	}
+
+	return ERR_PTR(-ERANGE);
+}
+EXPORT_SYMBOL_GPL(devm_request_free_mem_region);
+#endif /* CONFIG_DEVICE_PRIVATE */
+
 static int __init strict_iomem(char *str)
 {
 	if (strstr(str, "relaxed"))
diff --git a/mm/hmm.c b/mm/hmm.c
index e1dc98407e7b..13a16faf0a77 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -26,8 +26,6 @@
 #include <linux/mmu_notifier.h>
 #include <linux/memory_hotplug.h>
 
-#define PA_SECTION_SIZE (1UL << PA_SECTION_SHIFT)
-
 #if IS_ENABLED(CONFIG_HMM_MIRROR)
 static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
 
@@ -1372,7 +1370,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
 				  unsigned long size)
 {
 	struct hmm_devmem *devmem;
-	resource_size_t addr;
 	void *result;
 	int ret;
 
@@ -1398,32 +1395,10 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
 	if (ret)
 		return ERR_PTR(ret);
 
-	size = ALIGN(size, PA_SECTION_SIZE);
-	addr = min((unsigned long)iomem_resource.end,
-		   (1UL << MAX_PHYSMEM_BITS) - 1);
-	addr = addr - size + 1UL;
-
-	/*
-	 * FIXME add a new helper to quickly walk resource tree and find free
-	 * range
-	 *
-	 * FIXME what about ioport_resource resource ?
-	 */
-	for (; addr > size && addr >= iomem_resource.start; addr -= size) {
-		ret = region_intersects(addr, size, 0, IORES_DESC_NONE);
-		if (ret != REGION_DISJOINT)
-			continue;
-
-		devmem->resource = devm_request_mem_region(device, addr, size,
-							   dev_name(device));
-		if (!devmem->resource)
-			return ERR_PTR(-ENOMEM);
-		break;
-	}
-	if (!devmem->resource)
-		return ERR_PTR(-ERANGE);
-
-	devmem->resource->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY;
+	devmem->resource = devm_request_free_mem_region(device, &iomem_resource,
+			size);
+	if (IS_ERR(devmem->resource))
+		return ERR_CAST(devmem->resource);
 	devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
 	devmem->pfn_last = devmem->pfn_first +
 			   (resource_size(devmem->resource) >> PAGE_SHIFT);
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 07/22] memremap: move dev_pagemap callbacks into a separate structure
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (5 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 06/22] mm: factor out a devm_request_free_mem_region helper Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 19:18   ` Jason Gunthorpe
  2019-06-13 20:14   ` Logan Gunthorpe
  2019-06-13  9:43 ` [PATCH 08/22] memremap: pass a struct dev_pagemap to ->kill Christoph Hellwig
                   ` (16 subsequent siblings)
  23 siblings, 2 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

The dev_pagemap is a growing too many callbacks.  Move them into a
separate ops structure so that they are not duplicated for multiple
instances, and an attacker can't easily overwrite them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/dax/device.c              |  6 +++++-
 drivers/nvdimm/pmem.c             | 18 +++++++++++++-----
 drivers/pci/p2pdma.c              |  5 ++++-
 include/linux/memremap.h          | 29 +++++++++++++++--------------
 kernel/memremap.c                 | 12 ++++++------
 mm/hmm.c                          |  8 ++++++--
 tools/testing/nvdimm/test/iomap.c |  2 +-
 7 files changed, 50 insertions(+), 30 deletions(-)

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 996d68ff992a..4adab774dade 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -443,6 +443,10 @@ static void dev_dax_kill(void *dev_dax)
 	kill_dev_dax(dev_dax);
 }
 
+static const struct dev_pagemap_ops dev_dax_pagemap_ops = {
+	.kill		= dev_dax_percpu_kill,
+};
+
 int dev_dax_probe(struct device *dev)
 {
 	struct dev_dax *dev_dax = to_dev_dax(dev);
@@ -471,7 +475,7 @@ int dev_dax_probe(struct device *dev)
 		return rc;
 
 	dev_dax->pgmap.ref = &dev_dax->ref;
-	dev_dax->pgmap.kill = dev_dax_percpu_kill;
+	dev_dax->pgmap.ops = &dev_dax_pagemap_ops;
 	addr = devm_memremap_pages(dev, &dev_dax->pgmap);
 	if (IS_ERR(addr)) {
 		devm_remove_action(dev, dev_dax_percpu_exit, &dev_dax->ref);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index d9d845077b8b..4efbf184ea68 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -316,7 +316,7 @@ static void pmem_release_queue(void *q)
 	blk_cleanup_queue(q);
 }
 
-static void pmem_freeze_queue(struct percpu_ref *ref)
+static void pmem_kill(struct percpu_ref *ref)
 {
 	struct request_queue *q;
 
@@ -339,19 +339,27 @@ static void pmem_release_pgmap_ops(void *__pgmap)
 	dev_pagemap_put_ops();
 }
 
-static void fsdax_pagefree(struct page *page, void *data)
+static void pmem_fsdax_page_free(struct page *page, void *data)
 {
 	wake_up_var(&page->_refcount);
 }
 
+static const struct dev_pagemap_ops fsdax_pagemap_ops = {
+	.page_free		= pmem_fsdax_page_free,
+	.kill			= pmem_kill,
+};
+
+static const struct dev_pagemap_ops pmem_legacy_pagemap_ops = {
+	.kill			= pmem_kill,
+};
+
 static int setup_pagemap_fsdax(struct device *dev, struct dev_pagemap *pgmap)
 {
 	dev_pagemap_get_ops();
 	if (devm_add_action_or_reset(dev, pmem_release_pgmap_ops, pgmap))
 		return -ENOMEM;
 	pgmap->type = MEMORY_DEVICE_FS_DAX;
-	pgmap->page_free = fsdax_pagefree;
-
+	pgmap->ops = &fsdax_pagemap_ops;
 	return 0;
 }
 
@@ -412,7 +420,6 @@ static int pmem_attach_disk(struct device *dev,
 
 	pmem->pfn_flags = PFN_DEV;
 	pmem->pgmap.ref = &q->q_usage_counter;
-	pmem->pgmap.kill = pmem_freeze_queue;
 	if (is_nd_pfn(dev)) {
 		if (setup_pagemap_fsdax(dev, &pmem->pgmap))
 			return -ENOMEM;
@@ -433,6 +440,7 @@ static int pmem_attach_disk(struct device *dev,
 		pmem->pfn_flags |= PFN_MAP;
 		memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
 	} else {
+		pmem->pgmap.ops = &pmem_legacy_pagemap_ops;
 		addr = devm_memremap(dev, pmem->phys_addr,
 				pmem->size, ARCH_MEMREMAP_PMEM);
 		memcpy(&bb_res, &nsio->res, sizeof(bb_res));
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 742928d0053e..6e76380f5b97 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -150,6 +150,10 @@ static int pci_p2pdma_setup(struct pci_dev *pdev)
 	return error;
 }
 
+static const struct dev_pagemap_ops pci_p2pdma_pagemap_ops = {
+	.kill		= pci_p2pdma_percpu_kill,
+};
+
 /**
  * pci_p2pdma_add_resource - add memory for use as p2p memory
  * @pdev: the device to add the memory to
@@ -196,7 +200,6 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
 	pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
 	pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) -
 		pci_resource_start(pdev, bar);
-	pgmap->kill = pci_p2pdma_percpu_kill;
 
 	addr = devm_memremap_pages(&pdev->dev, pgmap);
 	if (IS_ERR(addr)) {
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index f0628660d541..5f7f40875b35 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -63,39 +63,40 @@ enum memory_type {
 	MEMORY_DEVICE_PCI_P2PDMA,
 };
 
-/*
- * Additional notes about MEMORY_DEVICE_PRIVATE may be found in
- * include/linux/hmm.h and Documentation/vm/hmm.rst. There is also a brief
- * explanation in include/linux/memory_hotplug.h.
- *
- * The page_free() callback is called once the page refcount reaches 1
- * (ZONE_DEVICE pages never reach 0 refcount unless there is a refcount bug.
- * This allows the device driver to implement its own memory management.)
- */
-typedef void (*dev_page_free_t)(struct page *page, void *data);
+struct dev_pagemap_ops {
+	/*
+	 * Called once the page refcount reaches 1.  (ZONE_DEVICE pages never
+	 * reach 0 refcount unless there is a refcount bug. This allows the
+	 * device driver to implement its own memory management.)
+	 */
+	void (*page_free)(struct page *page, void *data);
+
+	/*
+	 * Transition the percpu_ref in struct dev_pagemap to the dead state.
+	 */
+	void (*kill)(struct percpu_ref *ref);
+};
 
 /**
  * struct dev_pagemap - metadata for ZONE_DEVICE mappings
- * @page_free: free page callback when page refcount reaches 1
  * @altmap: pre-allocated/reserved memory for vmemmap allocations
  * @res: physical address range covered by @ref
  * @ref: reference count that pins the devm_memremap_pages() mapping
- * @kill: callback to transition @ref to the dead state
  * @dev: host device of the mapping for debug
  * @data: private data pointer for page_free()
  * @type: memory type: see MEMORY_* in memory_hotplug.h
+ * @ops: method table
  */
 struct dev_pagemap {
-	dev_page_free_t page_free;
 	struct vmem_altmap altmap;
 	bool altmap_valid;
 	struct resource res;
 	struct percpu_ref *ref;
-	void (*kill)(struct percpu_ref *ref);
 	struct device *dev;
 	void *data;
 	enum memory_type type;
 	u64 pci_p2pdma_bus_offset;
+	const struct dev_pagemap_ops *ops;
 };
 
 #ifdef CONFIG_ZONE_DEVICE
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 1490e63f69a9..e23286ce0ec4 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -92,7 +92,7 @@ static void devm_memremap_pages_release(void *data)
 	unsigned long pfn;
 	int nid;
 
-	pgmap->kill(pgmap->ref);
+	pgmap->ops->kill(pgmap->ref);
 	for_each_device_pfn(pfn, pgmap)
 		put_page(pfn_to_page(pfn));
 
@@ -127,8 +127,8 @@ static void devm_memremap_pages_release(void *data)
  * @pgmap: pointer to a struct dev_pagemap
  *
  * Notes:
- * 1/ At a minimum the res, ref and type members of @pgmap must be initialized
- *    by the caller before passing it to this function
+ * 1/ At a minimum the res, ref and type and ops members of @pgmap must be
+ *    initialized by the caller before passing it to this function
  *
  * 2/ The altmap field may optionally be initialized, in which case altmap_valid
  *    must be set to true
@@ -156,7 +156,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 	pgprot_t pgprot = PAGE_KERNEL;
 	int error, nid, is_ram;
 
-	if (!pgmap->ref || !pgmap->kill)
+	if (!pgmap->ref || !pgmap->ops || !pgmap->ops->kill)
 		return ERR_PTR(-EINVAL);
 
 	align_start = res->start & ~(SECTION_SIZE - 1);
@@ -266,7 +266,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
  err_pfn_remap:
 	pgmap_array_delete(res);
  err_array:
-	pgmap->kill(pgmap->ref);
+	pgmap->ops->kill(pgmap->ref);
 	return ERR_PTR(error);
 }
 EXPORT_SYMBOL_GPL(devm_memremap_pages);
@@ -353,7 +353,7 @@ void __put_devmap_managed_page(struct page *page)
 
 		mem_cgroup_uncharge(page);
 
-		page->pgmap->page_free(page, page->pgmap->data);
+		page->pgmap->ops->page_free(page, page->pgmap->data);
 	} else if (!count)
 		__put_page(page);
 }
diff --git a/mm/hmm.c b/mm/hmm.c
index 13a16faf0a77..2501ac6045d0 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1348,6 +1348,11 @@ static void hmm_devmem_free(struct page *page, void *data)
 	devmem->ops->free(devmem, page);
 }
 
+static const struct dev_pagemap_ops hmm_pagemap_ops = {
+	.page_free		= hmm_devmem_free,
+	.kill			= hmm_devmem_ref_kill,
+};
+
 /*
  * hmm_devmem_add() - hotplug ZONE_DEVICE memory for device memory
  *
@@ -1406,11 +1411,10 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
 
 	devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
 	devmem->pagemap.res = *devmem->resource;
-	devmem->pagemap.page_free = hmm_devmem_free;
+	devmem->pagemap.ops = &hmm_pagemap_ops;
 	devmem->pagemap.altmap_valid = false;
 	devmem->pagemap.ref = &devmem->ref;
 	devmem->pagemap.data = devmem;
-	devmem->pagemap.kill = hmm_devmem_ref_kill;
 
 	result = devm_memremap_pages(devmem->device, &devmem->pagemap);
 	if (IS_ERR(result))
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index c6635fee27d8..7f5ece9b5011 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -108,7 +108,7 @@ static void nfit_test_kill(void *_pgmap)
 {
 	struct dev_pagemap *pgmap = _pgmap;
 
-	pgmap->kill(pgmap->ref);
+	pgmap->ops->kill(pgmap->ref);
 }
 
 void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 08/22] memremap: pass a struct dev_pagemap to ->kill
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (6 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 07/22] memremap: move dev_pagemap callbacks into a separate structure Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 19:26   ` Jason Gunthorpe
  2019-06-13 20:12   ` Logan Gunthorpe
  2019-06-13  9:43 ` [PATCH 09/22] memremap: lift the devmap_enable manipulation into devm_memremap_pages Christoph Hellwig
                   ` (15 subsequent siblings)
  23 siblings, 2 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

Passing the actual typed structure leads to more understandable code
vs the actual references.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/dax/device.c              | 7 +++----
 drivers/nvdimm/pmem.c             | 6 +++---
 drivers/pci/p2pdma.c              | 6 +++---
 include/linux/memremap.h          | 2 +-
 kernel/memremap.c                 | 4 ++--
 mm/hmm.c                          | 4 ++--
 tools/testing/nvdimm/test/iomap.c | 6 ++----
 7 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 4adab774dade..e23fa1bd8c97 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -37,13 +37,12 @@ static void dev_dax_percpu_exit(void *data)
 	percpu_ref_exit(ref);
 }
 
-static void dev_dax_percpu_kill(struct percpu_ref *data)
+static void dev_dax_percpu_kill(struct dev_pagemap *pgmap)
 {
-	struct percpu_ref *ref = data;
-	struct dev_dax *dev_dax = ref_to_dev_dax(ref);
+	struct dev_dax *dev_dax = container_of(pgmap, struct dev_dax, pgmap);
 
 	dev_dbg(&dev_dax->dev, "%s\n", __func__);
-	percpu_ref_kill(ref);
+	percpu_ref_kill(pgmap->ref);
 }
 
 static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 4efbf184ea68..b9638c6553a1 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -316,11 +316,11 @@ static void pmem_release_queue(void *q)
 	blk_cleanup_queue(q);
 }
 
-static void pmem_kill(struct percpu_ref *ref)
+static void pmem_kill(struct dev_pagemap *pgmap)
 {
-	struct request_queue *q;
+	struct request_queue *q =
+		container_of(pgmap->ref, struct request_queue, q_usage_counter);
 
-	q = container_of(ref, typeof(*q), q_usage_counter);
 	blk_freeze_queue_start(q);
 }
 
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 6e76380f5b97..3bcacc9222c6 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -82,7 +82,7 @@ static void pci_p2pdma_percpu_release(struct percpu_ref *ref)
 	complete_all(&p2p->devmap_ref_done);
 }
 
-static void pci_p2pdma_percpu_kill(struct percpu_ref *ref)
+static void pci_p2pdma_percpu_kill(struct dev_pagemap *pgmap)
 {
 	/*
 	 * pci_p2pdma_add_resource() may be called multiple times
@@ -90,10 +90,10 @@ static void pci_p2pdma_percpu_kill(struct percpu_ref *ref)
 	 * times. We only want the first action to actually kill the
 	 * percpu_ref.
 	 */
-	if (percpu_ref_is_dying(ref))
+	if (percpu_ref_is_dying(pgmap->ref))
 		return;
 
-	percpu_ref_kill(ref);
+	percpu_ref_kill(pgmap->ref);
 }
 
 static void pci_p2pdma_release(void *data)
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 5f7f40875b35..96a3a6d564ad 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -74,7 +74,7 @@ struct dev_pagemap_ops {
 	/*
 	 * Transition the percpu_ref in struct dev_pagemap to the dead state.
 	 */
-	void (*kill)(struct percpu_ref *ref);
+	void (*kill)(struct dev_pagemap *pgmap);
 };
 
 /**
diff --git a/kernel/memremap.c b/kernel/memremap.c
index e23286ce0ec4..94b830b6eca5 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -92,7 +92,7 @@ static void devm_memremap_pages_release(void *data)
 	unsigned long pfn;
 	int nid;
 
-	pgmap->ops->kill(pgmap->ref);
+	pgmap->ops->kill(pgmap);
 	for_each_device_pfn(pfn, pgmap)
 		put_page(pfn_to_page(pfn));
 
@@ -266,7 +266,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
  err_pfn_remap:
 	pgmap_array_delete(res);
  err_array:
-	pgmap->ops->kill(pgmap->ref);
+	pgmap->ops->kill(pgmap);
 	return ERR_PTR(error);
 }
 EXPORT_SYMBOL_GPL(devm_memremap_pages);
diff --git a/mm/hmm.c b/mm/hmm.c
index 2501ac6045d0..c76a1b5defda 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1325,9 +1325,9 @@ static void hmm_devmem_ref_exit(void *data)
 	percpu_ref_exit(ref);
 }
 
-static void hmm_devmem_ref_kill(struct percpu_ref *ref)
+static void hmm_devmem_ref_kill(struct dev_pagemap *pgmap)
 {
-	percpu_ref_kill(ref);
+	percpu_ref_kill(pgmap->ref);
 }
 
 static vm_fault_t hmm_devmem_fault(struct vm_area_struct *vma,
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 7f5ece9b5011..ee07c4de2b35 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -104,11 +104,9 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
 }
 EXPORT_SYMBOL(__wrap_devm_memremap);
 
-static void nfit_test_kill(void *_pgmap)
+static void nfit_test_kill(void *pgmap)
 {
-	struct dev_pagemap *pgmap = _pgmap;
-
-	pgmap->ops->kill(pgmap->ref);
+	pgmap->ops->kill(pgmap);
 }
 
 void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 09/22] memremap: lift the devmap_enable manipulation into devm_memremap_pages
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (7 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 08/22] memremap: pass a struct dev_pagemap to ->kill Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 19:34   ` Jason Gunthorpe
  2019-06-13  9:43 ` [PATCH 10/22] memremap: add a migrate callback to struct dev_pagemap_ops Christoph Hellwig
                   ` (14 subsequent siblings)
  23 siblings, 1 reply; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

Just check if there is a ->page_free operation set and take care of the
static key enable, as well as the put using device managed resources.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvdimm/pmem.c | 23 +++--------------
 include/linux/mm.h    | 10 --------
 kernel/memremap.c     | 59 +++++++++++++++++++++++++++----------------
 mm/hmm.c              |  2 --
 4 files changed, 41 insertions(+), 53 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index b9638c6553a1..66837eed6375 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -334,11 +334,6 @@ static void pmem_release_disk(void *__pmem)
 	put_disk(pmem->disk);
 }
 
-static void pmem_release_pgmap_ops(void *__pgmap)
-{
-	dev_pagemap_put_ops();
-}
-
 static void pmem_fsdax_page_free(struct page *page, void *data)
 {
 	wake_up_var(&page->_refcount);
@@ -353,16 +348,6 @@ static const struct dev_pagemap_ops pmem_legacy_pagemap_ops = {
 	.kill			= pmem_kill,
 };
 
-static int setup_pagemap_fsdax(struct device *dev, struct dev_pagemap *pgmap)
-{
-	dev_pagemap_get_ops();
-	if (devm_add_action_or_reset(dev, pmem_release_pgmap_ops, pgmap))
-		return -ENOMEM;
-	pgmap->type = MEMORY_DEVICE_FS_DAX;
-	pgmap->ops = &fsdax_pagemap_ops;
-	return 0;
-}
-
 static int pmem_attach_disk(struct device *dev,
 		struct nd_namespace_common *ndns)
 {
@@ -421,8 +406,8 @@ static int pmem_attach_disk(struct device *dev,
 	pmem->pfn_flags = PFN_DEV;
 	pmem->pgmap.ref = &q->q_usage_counter;
 	if (is_nd_pfn(dev)) {
-		if (setup_pagemap_fsdax(dev, &pmem->pgmap))
-			return -ENOMEM;
+		pmem->pgmap.type = MEMORY_DEVICE_FS_DAX;
+		pmem->pgmap.ops = &fsdax_pagemap_ops;
 		addr = devm_memremap_pages(dev, &pmem->pgmap);
 		pfn_sb = nd_pfn->pfn_sb;
 		pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
@@ -434,8 +419,8 @@ static int pmem_attach_disk(struct device *dev,
 	} else if (pmem_should_map_pages(dev)) {
 		memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res));
 		pmem->pgmap.altmap_valid = false;
-		if (setup_pagemap_fsdax(dev, &pmem->pgmap))
-			return -ENOMEM;
+		pmem->pgmap.type = MEMORY_DEVICE_FS_DAX;
+		pmem->pgmap.ops = &fsdax_pagemap_ops;
 		addr = devm_memremap_pages(dev, &pmem->pgmap);
 		pmem->pfn_flags |= PFN_MAP;
 		memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0e8834ac32b7..edcf2b821647 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -921,8 +921,6 @@ static inline bool is_zone_device_page(const struct page *page)
 #endif
 
 #ifdef CONFIG_DEV_PAGEMAP_OPS
-void dev_pagemap_get_ops(void);
-void dev_pagemap_put_ops(void);
 void __put_devmap_managed_page(struct page *page);
 DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
 static inline bool put_devmap_managed_page(struct page *page)
@@ -969,14 +967,6 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
 #endif /* CONFIG_PCI_P2PDMA */
 
 #else /* CONFIG_DEV_PAGEMAP_OPS */
-static inline void dev_pagemap_get_ops(void)
-{
-}
-
-static inline void dev_pagemap_put_ops(void)
-{
-}
-
 static inline bool put_devmap_managed_page(struct page *page)
 {
 	return false;
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 94b830b6eca5..6a3183cac764 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -17,6 +17,37 @@ static DEFINE_XARRAY(pgmap_array);
 #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
 #define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
 
+#ifdef CONFIG_DEV_PAGEMAP_OPS
+DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
+EXPORT_SYMBOL(devmap_managed_key);
+static atomic_t devmap_enable;
+
+static void dev_pagemap_put_ops(void *data)
+{
+	if (atomic_dec_and_test(&devmap_enable))
+		static_branch_disable(&devmap_managed_key);
+}
+
+/*
+ * Toggle the static key for ->page_free() callbacks when dev_pagemap
+ * pages go idle.
+ */
+static int dev_pagemap_enable(struct device *dev)
+{
+	if (atomic_inc_return(&devmap_enable) == 1)
+		static_branch_enable(&devmap_managed_key);
+
+	if (devm_add_action_or_reset(dev, dev_pagemap_put_ops, NULL))
+		return -ENOMEM;
+	return 0;
+}
+#else
+static inline int dev_pagemap_enable(struct device *dev)
+{
+	return 0;
+}
+#endif /* CONFIG_DEV_PAGEMAP_OPS */
+
 #if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
 vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
 		       unsigned long addr,
@@ -159,6 +190,12 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 	if (!pgmap->ref || !pgmap->ops || !pgmap->ops->kill)
 		return ERR_PTR(-EINVAL);
 
+	if (pgmap->ops->page_free) {
+		error = dev_pagemap_enable(dev);
+		if (error)
+			return ERR_PTR(error);
+	}
+
 	align_start = res->start & ~(SECTION_SIZE - 1);
 	align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
 		- align_start;
@@ -316,28 +353,6 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
 EXPORT_SYMBOL_GPL(get_dev_pagemap);
 
 #ifdef CONFIG_DEV_PAGEMAP_OPS
-DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
-EXPORT_SYMBOL(devmap_managed_key);
-static atomic_t devmap_enable;
-
-/*
- * Toggle the static key for ->page_free() callbacks when dev_pagemap
- * pages go idle.
- */
-void dev_pagemap_get_ops(void)
-{
-	if (atomic_inc_return(&devmap_enable) == 1)
-		static_branch_enable(&devmap_managed_key);
-}
-EXPORT_SYMBOL_GPL(dev_pagemap_get_ops);
-
-void dev_pagemap_put_ops(void)
-{
-	if (atomic_dec_and_test(&devmap_enable))
-		static_branch_disable(&devmap_managed_key);
-}
-EXPORT_SYMBOL_GPL(dev_pagemap_put_ops);
-
 void __put_devmap_managed_page(struct page *page)
 {
 	int count = page_ref_dec_return(page);
diff --git a/mm/hmm.c b/mm/hmm.c
index c76a1b5defda..6dc769feb2e1 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1378,8 +1378,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
 	void *result;
 	int ret;
 
-	dev_pagemap_get_ops();
-
 	devmem = devm_kzalloc(device, sizeof(*devmem), GFP_KERNEL);
 	if (!devmem)
 		return ERR_PTR(-ENOMEM);
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 10/22] memremap: add a migrate callback to struct dev_pagemap_ops
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (8 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 09/22] memremap: lift the devmap_enable manipulation into devm_memremap_pages Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 23:42   ` Ralph Campbell
  2019-06-13  9:43 ` [PATCH 11/22] memremap: remove the data field in struct dev_pagemap Christoph Hellwig
                   ` (13 subsequent siblings)
  23 siblings, 1 reply; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

This replaces the hacky ->fault callback, which is currently directly
called from common code through a hmm specific data structure as an
exercise in layering violations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/hmm.h      |  6 ------
 include/linux/memremap.h |  6 ++++++
 include/linux/swapops.h  | 15 ---------------
 kernel/memremap.c        | 31 -------------------------------
 mm/hmm.c                 | 13 +++++--------
 mm/memory.c              |  9 ++-------
 6 files changed, 13 insertions(+), 67 deletions(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 5761a39221a6..3c9a59dbfdb8 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -658,11 +658,6 @@ struct hmm_devmem_ops {
  * chunk, as an optimization. It must, however, prioritize the faulting address
  * over all the others.
  */
-typedef vm_fault_t (*dev_page_fault_t)(struct vm_area_struct *vma,
-				unsigned long addr,
-				const struct page *page,
-				unsigned int flags,
-				pmd_t *pmdp);
 
 struct hmm_devmem {
 	struct completion		completion;
@@ -673,7 +668,6 @@ struct hmm_devmem {
 	struct dev_pagemap		pagemap;
 	const struct hmm_devmem_ops	*ops;
 	struct percpu_ref		ref;
-	dev_page_fault_t		page_fault;
 };
 
 /*
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 96a3a6d564ad..03a4099be701 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -75,6 +75,12 @@ struct dev_pagemap_ops {
 	 * Transition the percpu_ref in struct dev_pagemap to the dead state.
 	 */
 	void (*kill)(struct dev_pagemap *pgmap);
+
+	/*
+	 * Used for private (un-addressable) device memory only.  Must migrate
+	 * the page back to a CPU accessible page.
+	 */
+	vm_fault_t (*migrate)(struct vm_fault *vmf);
 };
 
 /**
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 4d961668e5fc..15bdb6fe71e5 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -129,12 +129,6 @@ static inline struct page *device_private_entry_to_page(swp_entry_t entry)
 {
 	return pfn_to_page(swp_offset(entry));
 }
-
-vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
-		       unsigned long addr,
-		       swp_entry_t entry,
-		       unsigned int flags,
-		       pmd_t *pmdp);
 #else /* CONFIG_DEVICE_PRIVATE */
 static inline swp_entry_t make_device_private_entry(struct page *page, bool write)
 {
@@ -164,15 +158,6 @@ static inline struct page *device_private_entry_to_page(swp_entry_t entry)
 {
 	return NULL;
 }
-
-static inline vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
-				     unsigned long addr,
-				     swp_entry_t entry,
-				     unsigned int flags,
-				     pmd_t *pmdp)
-{
-	return VM_FAULT_SIGBUS;
-}
 #endif /* CONFIG_DEVICE_PRIVATE */
 
 #ifdef CONFIG_MIGRATION
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 6a3183cac764..7167e717647d 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -11,7 +11,6 @@
 #include <linux/types.h>
 #include <linux/wait_bit.h>
 #include <linux/xarray.h>
-#include <linux/hmm.h>
 
 static DEFINE_XARRAY(pgmap_array);
 #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
@@ -48,36 +47,6 @@ static inline int dev_pagemap_enable(struct device *dev)
 }
 #endif /* CONFIG_DEV_PAGEMAP_OPS */
 
-#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
-vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
-		       unsigned long addr,
-		       swp_entry_t entry,
-		       unsigned int flags,
-		       pmd_t *pmdp)
-{
-	struct page *page = device_private_entry_to_page(entry);
-	struct hmm_devmem *devmem;
-
-	devmem = container_of(page->pgmap, typeof(*devmem), pagemap);
-
-	/*
-	 * The page_fault() callback must migrate page back to system memory
-	 * so that CPU can access it. This might fail for various reasons
-	 * (device issue, device was unsafely unplugged, ...). When such
-	 * error conditions happen, the callback must return VM_FAULT_SIGBUS.
-	 *
-	 * Note that because memory cgroup charges are accounted to the device
-	 * memory, this should never fail because of memory restrictions (but
-	 * allocation of regular system page might still fail because we are
-	 * out of memory).
-	 *
-	 * There is a more in-depth description of what that callback can and
-	 * cannot do, in include/linux/memremap.h
-	 */
-	return devmem->page_fault(vma, addr, page, flags, pmdp);
-}
-#endif /* CONFIG_DEVICE_PRIVATE */
-
 static void pgmap_array_delete(struct resource *res)
 {
 	xa_store_range(&pgmap_array, PHYS_PFN(res->start), PHYS_PFN(res->end),
diff --git a/mm/hmm.c b/mm/hmm.c
index 6dc769feb2e1..aab799677c7d 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1330,15 +1330,12 @@ static void hmm_devmem_ref_kill(struct dev_pagemap *pgmap)
 	percpu_ref_kill(pgmap->ref);
 }
 
-static vm_fault_t hmm_devmem_fault(struct vm_area_struct *vma,
-			    unsigned long addr,
-			    const struct page *page,
-			    unsigned int flags,
-			    pmd_t *pmdp)
+static vm_fault_t hmm_devmem_migrate(struct vm_fault *vmf)
 {
-	struct hmm_devmem *devmem = page->pgmap->data;
+	struct hmm_devmem *devmem = vmf->page->pgmap->data;
 
-	return devmem->ops->fault(devmem, vma, addr, page, flags, pmdp);
+	return devmem->ops->fault(devmem, vmf->vma, vmf->address, vmf->page,
+			vmf->flags, vmf->pmd);
 }
 
 static void hmm_devmem_free(struct page *page, void *data)
@@ -1351,6 +1348,7 @@ static void hmm_devmem_free(struct page *page, void *data)
 static const struct dev_pagemap_ops hmm_pagemap_ops = {
 	.page_free		= hmm_devmem_free,
 	.kill			= hmm_devmem_ref_kill,
+	.migrate		= hmm_devmem_migrate,
 };
 
 /*
@@ -1405,7 +1403,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
 	devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
 	devmem->pfn_last = devmem->pfn_first +
 			   (resource_size(devmem->resource) >> PAGE_SHIFT);
-	devmem->page_fault = hmm_devmem_fault;
 
 	devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
 	devmem->pagemap.res = *devmem->resource;
diff --git a/mm/memory.c b/mm/memory.c
index ddf20bd0c317..cbf3cb598436 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2782,13 +2782,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 			migration_entry_wait(vma->vm_mm, vmf->pmd,
 					     vmf->address);
 		} else if (is_device_private_entry(entry)) {
-			/*
-			 * For un-addressable device memory we call the pgmap
-			 * fault handler callback. The callback must migrate
-			 * the page back to some CPU accessible page.
-			 */
-			ret = device_private_entry_fault(vma, vmf->address, entry,
-						 vmf->flags, vmf->pmd);
+			vmf->page = device_private_entry_to_page(entry);
+			ret = page->pgmap->ops->migrate(vmf);
 		} else if (is_hwpoison_entry(entry)) {
 			ret = VM_FAULT_HWPOISON;
 		} else {
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 11/22] memremap: remove the data field in struct dev_pagemap
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (9 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 10/22] memremap: add a migrate callback to struct dev_pagemap_ops Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 19:37   ` Jason Gunthorpe
  2019-06-13  9:43 ` [PATCH 12/22] memremap: provide an optional internal refcount " Christoph Hellwig
                   ` (12 subsequent siblings)
  23 siblings, 1 reply; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

struct dev_pagemap is always embedded into a containing structure, so
there is no need to an additional private data field.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvdimm/pmem.c    | 2 +-
 include/linux/memremap.h | 3 +--
 kernel/memremap.c        | 2 +-
 mm/hmm.c                 | 9 +++++----
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 66837eed6375..847d1b2bc10e 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -334,7 +334,7 @@ static void pmem_release_disk(void *__pmem)
 	put_disk(pmem->disk);
 }
 
-static void pmem_fsdax_page_free(struct page *page, void *data)
+static void pmem_fsdax_page_free(struct page *page)
 {
 	wake_up_var(&page->_refcount);
 }
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 03a4099be701..75b80de6394a 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -69,7 +69,7 @@ struct dev_pagemap_ops {
 	 * reach 0 refcount unless there is a refcount bug. This allows the
 	 * device driver to implement its own memory management.)
 	 */
-	void (*page_free)(struct page *page, void *data);
+	void (*page_free)(struct page *page);
 
 	/*
 	 * Transition the percpu_ref in struct dev_pagemap to the dead state.
@@ -99,7 +99,6 @@ struct dev_pagemap {
 	struct resource res;
 	struct percpu_ref *ref;
 	struct device *dev;
-	void *data;
 	enum memory_type type;
 	u64 pci_p2pdma_bus_offset;
 	const struct dev_pagemap_ops *ops;
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 7167e717647d..5c94ad4f5783 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -337,7 +337,7 @@ void __put_devmap_managed_page(struct page *page)
 
 		mem_cgroup_uncharge(page);
 
-		page->pgmap->ops->page_free(page, page->pgmap->data);
+		page->pgmap->ops->page_free(page);
 	} else if (!count)
 		__put_page(page);
 }
diff --git a/mm/hmm.c b/mm/hmm.c
index aab799677c7d..ff0f9568922b 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1332,15 +1332,17 @@ static void hmm_devmem_ref_kill(struct dev_pagemap *pgmap)
 
 static vm_fault_t hmm_devmem_migrate(struct vm_fault *vmf)
 {
-	struct hmm_devmem *devmem = vmf->page->pgmap->data;
+	struct hmm_devmem *devmem =
+		container_of(vmf->page->pgmap, struct hmm_devmem, pagemap);
 
 	return devmem->ops->fault(devmem, vmf->vma, vmf->address, vmf->page,
 			vmf->flags, vmf->pmd);
 }
 
-static void hmm_devmem_free(struct page *page, void *data)
+static void hmm_devmem_free(struct page *page)
 {
-	struct hmm_devmem *devmem = data;
+	struct hmm_devmem *devmem =
+		container_of(page->pgmap, struct hmm_devmem, pagemap);
 
 	devmem->ops->free(devmem, page);
 }
@@ -1409,7 +1411,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
 	devmem->pagemap.ops = &hmm_pagemap_ops;
 	devmem->pagemap.altmap_valid = false;
 	devmem->pagemap.ref = &devmem->ref;
-	devmem->pagemap.data = devmem;
 
 	result = devm_memremap_pages(devmem->device, &devmem->pagemap);
 	if (IS_ERR(result))
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 12/22] memremap: provide an optional internal refcount in struct dev_pagemap
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (10 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 11/22] memremap: remove the data field in struct dev_pagemap Christoph Hellwig
@ 2019-06-13  9:43 ` " Christoph Hellwig
  2019-06-13  9:43 ` [PATCH 13/22] device-dax: use the dev_pagemap internal refcount Christoph Hellwig
                   ` (11 subsequent siblings)
  23 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

Provide an internal refcounting logic if no ->ref field is provided
in the pagemap passed into devm_memremap_pages so that callers don't
have to reinvent it poorly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/memremap.h          |  4 +++
 kernel/memremap.c                 | 60 ++++++++++++++++++++++++++-----
 tools/testing/nvdimm/test/iomap.c |  9 +++--
 3 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 75b80de6394a..b77ed00851ce 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -88,6 +88,8 @@ struct dev_pagemap_ops {
  * @altmap: pre-allocated/reserved memory for vmemmap allocations
  * @res: physical address range covered by @ref
  * @ref: reference count that pins the devm_memremap_pages() mapping
+ * @internal_ref: internal reference if @ref is not provided by the caller
+ * @done: completion for @internal_ref
  * @dev: host device of the mapping for debug
  * @data: private data pointer for page_free()
  * @type: memory type: see MEMORY_* in memory_hotplug.h
@@ -98,6 +100,8 @@ struct dev_pagemap {
 	bool altmap_valid;
 	struct resource res;
 	struct percpu_ref *ref;
+	struct percpu_ref internal_ref;
+	struct completion done;
 	struct device *dev;
 	enum memory_type type;
 	u64 pci_p2pdma_bus_offset;
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 5c94ad4f5783..edca4389da68 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -83,6 +83,14 @@ static unsigned long pfn_next(unsigned long pfn)
 #define for_each_device_pfn(pfn, map) \
 	for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn))
 
+static void dev_pagemap_kill(struct dev_pagemap *pgmap)
+{
+	if (pgmap->ops && pgmap->ops->kill)
+		pgmap->ops->kill(pgmap);
+	else
+		percpu_ref_kill(pgmap->ref);
+}
+
 static void devm_memremap_pages_release(void *data)
 {
 	struct dev_pagemap *pgmap = data;
@@ -92,7 +100,8 @@ static void devm_memremap_pages_release(void *data)
 	unsigned long pfn;
 	int nid;
 
-	pgmap->ops->kill(pgmap);
+	dev_pagemap_kill(pgmap);
+
 	for_each_device_pfn(pfn, pgmap)
 		put_page(pfn_to_page(pfn));
 
@@ -121,20 +130,37 @@ static void devm_memremap_pages_release(void *data)
 		      "%s: failed to free all reserved pages\n", __func__);
 }
 
+static void dev_pagemap_percpu_release(struct percpu_ref *ref)
+{
+	struct dev_pagemap *pgmap =
+		container_of(ref, struct dev_pagemap, internal_ref);
+
+	complete(&pgmap->done);
+}
+
+static void dev_pagemap_percpu_exit(void *data)
+{
+	struct dev_pagemap *pgmap = data;
+
+	wait_for_completion(&pgmap->done);
+	percpu_ref_exit(pgmap->ref);
+}
+
 /**
  * devm_memremap_pages - remap and provide memmap backing for the given resource
  * @dev: hosting device for @res
  * @pgmap: pointer to a struct dev_pagemap
  *
  * Notes:
- * 1/ At a minimum the res, ref and type and ops members of @pgmap must be
- *    initialized by the caller before passing it to this function
+ * 1/ At a minimum the res and type members of @pgmap must be initialized
+ *    by the caller before passing it to this function
  *
  * 2/ The altmap field may optionally be initialized, in which case altmap_valid
  *    must be set to true
  *
- * 3/ pgmap->ref must be 'live' on entry and will be killed at
- *    devm_memremap_pages_release() time, or if this routine fails.
+ * 3/ The ref field may optionally be provided, in which pgmap->ref must be
+ *    'live' on entry and will be killed at devm_memremap_pages_release() time,
+ *    or if this routine fails.
  *
  * 4/ res is expected to be a host memory range that could feasibly be
  *    treated as a "System RAM" range, i.e. not a device mmio range, but
@@ -156,10 +182,26 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 	pgprot_t pgprot = PAGE_KERNEL;
 	int error, nid, is_ram;
 
-	if (!pgmap->ref || !pgmap->ops || !pgmap->ops->kill)
-		return ERR_PTR(-EINVAL);
+	if (!pgmap->ref) {
+		if (pgmap->ops && pgmap->ops->kill)
+			return ERR_PTR(-EINVAL);
+
+		init_completion(&pgmap->done);
+		error = percpu_ref_init(&pgmap->internal_ref,
+				dev_pagemap_percpu_release, 0, GFP_KERNEL);
+		if (error)
+			return ERR_PTR(error);
+		pgmap->ref = &pgmap->internal_ref;
+		error = devm_add_action_or_reset(dev, dev_pagemap_percpu_exit,
+				pgmap);
+		if (error)
+			return ERR_PTR(error);
+	} else {
+		if (!pgmap->ops || !pgmap->ops->kill)
+			return ERR_PTR(-EINVAL);
+	}
 
-	if (pgmap->ops->page_free) {
+	if (pgmap->ops && pgmap->ops->page_free) {
 		error = dev_pagemap_enable(dev);
 		if (error)
 			return ERR_PTR(error);
@@ -272,7 +314,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
  err_pfn_remap:
 	pgmap_array_delete(res);
  err_array:
-	pgmap->ops->kill(pgmap);
+	dev_pagemap_kill(pgmap);
 	return ERR_PTR(error);
 }
 EXPORT_SYMBOL_GPL(devm_memremap_pages);
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index ee07c4de2b35..3d0e916f9fff 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -104,9 +104,14 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
 }
 EXPORT_SYMBOL(__wrap_devm_memremap);
 
-static void nfit_test_kill(void *pgmap)
+static void nfit_test_kill(void *data)
 {
-	pgmap->ops->kill(pgmap);
+	struct dev_pagemap *pgmap = data;
+
+	if (pgmap->ops && pgmap->ops->kill)
+		pgmap->ops->kill(pgmap);
+	else
+		percpu_ref_kill(pgmap->ref);
 }
 
 void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 13/22] device-dax: use the dev_pagemap internal refcount
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (11 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 12/22] memremap: provide an optional internal refcount " Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-14  0:22   ` Ira Weiny
  2019-06-13  9:43 ` [PATCH 14/22] nouveau: use alloc_page_vma directly Christoph Hellwig
                   ` (10 subsequent siblings)
  23 siblings, 1 reply; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

The functionality is identical to the one currently open coded in
device-dax.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/dax/dax-private.h |  4 ---
 drivers/dax/device.c      | 52 +--------------------------------------
 2 files changed, 1 insertion(+), 55 deletions(-)

diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
index a45612148ca0..ed04a18a35be 100644
--- a/drivers/dax/dax-private.h
+++ b/drivers/dax/dax-private.h
@@ -51,8 +51,6 @@ struct dax_region {
  * @target_node: effective numa node if dev_dax memory range is onlined
  * @dev - device core
  * @pgmap - pgmap for memmap setup / lifetime (driver owned)
- * @ref: pgmap reference count (driver owned)
- * @cmp: @ref final put completion (driver owned)
  */
 struct dev_dax {
 	struct dax_region *region;
@@ -60,8 +58,6 @@ struct dev_dax {
 	int target_node;
 	struct device dev;
 	struct dev_pagemap pgmap;
-	struct percpu_ref ref;
-	struct completion cmp;
 };
 
 static inline struct dev_dax *to_dev_dax(struct device *dev)
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index e23fa1bd8c97..a9d7c90ecf1e 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -14,37 +14,6 @@
 #include "dax-private.h"
 #include "bus.h"
 
-static struct dev_dax *ref_to_dev_dax(struct percpu_ref *ref)
-{
-	return container_of(ref, struct dev_dax, ref);
-}
-
-static void dev_dax_percpu_release(struct percpu_ref *ref)
-{
-	struct dev_dax *dev_dax = ref_to_dev_dax(ref);
-
-	dev_dbg(&dev_dax->dev, "%s\n", __func__);
-	complete(&dev_dax->cmp);
-}
-
-static void dev_dax_percpu_exit(void *data)
-{
-	struct percpu_ref *ref = data;
-	struct dev_dax *dev_dax = ref_to_dev_dax(ref);
-
-	dev_dbg(&dev_dax->dev, "%s\n", __func__);
-	wait_for_completion(&dev_dax->cmp);
-	percpu_ref_exit(ref);
-}
-
-static void dev_dax_percpu_kill(struct dev_pagemap *pgmap)
-{
-	struct dev_dax *dev_dax = container_of(pgmap, struct dev_dax, pgmap);
-
-	dev_dbg(&dev_dax->dev, "%s\n", __func__);
-	percpu_ref_kill(pgmap->ref);
-}
-
 static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
 		const char *func)
 {
@@ -442,10 +411,6 @@ static void dev_dax_kill(void *dev_dax)
 	kill_dev_dax(dev_dax);
 }
 
-static const struct dev_pagemap_ops dev_dax_pagemap_ops = {
-	.kill		= dev_dax_percpu_kill,
-};
-
 int dev_dax_probe(struct device *dev)
 {
 	struct dev_dax *dev_dax = to_dev_dax(dev);
@@ -463,24 +428,9 @@ int dev_dax_probe(struct device *dev)
 		return -EBUSY;
 	}
 
-	init_completion(&dev_dax->cmp);
-	rc = percpu_ref_init(&dev_dax->ref, dev_dax_percpu_release, 0,
-			GFP_KERNEL);
-	if (rc)
-		return rc;
-
-	rc = devm_add_action_or_reset(dev, dev_dax_percpu_exit, &dev_dax->ref);
-	if (rc)
-		return rc;
-
-	dev_dax->pgmap.ref = &dev_dax->ref;
-	dev_dax->pgmap.ops = &dev_dax_pagemap_ops;
 	addr = devm_memremap_pages(dev, &dev_dax->pgmap);
-	if (IS_ERR(addr)) {
-		devm_remove_action(dev, dev_dax_percpu_exit, &dev_dax->ref);
-		percpu_ref_exit(&dev_dax->ref);
+	if (IS_ERR(addr))
 		return PTR_ERR(addr);
-	}
 
 	inode = dax_inode(dax_dev);
 	cdev = inode->i_cdev;
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 14/22] nouveau: use alloc_page_vma directly
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (12 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 13/22] device-dax: use the dev_pagemap internal refcount Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 19:39   ` Jason Gunthorpe
  2019-06-13  9:43 ` [PATCH 15/22] nouveau: use devm_memremap_pages directly Christoph Hellwig
                   ` (9 subsequent siblings)
  23 siblings, 1 reply; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

hmm_vma_alloc_locked_page is scheduled to go away, use the proper
mm function directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/gpu/drm/nouveau/nouveau_dmem.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 40c47d6a7d78..a50f6fd2fe24 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -148,11 +148,12 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma,
 		if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
 			continue;
 
-		dpage = hmm_vma_alloc_locked_page(vma, addr);
+		dpage = alloc_page_vma(GFP_HIGHUSER, vma, addr);
 		if (!dpage) {
 			dst_pfns[i] = MIGRATE_PFN_ERROR;
 			continue;
 		}
+		lock_page(dpage);
 
 		dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) |
 			      MIGRATE_PFN_LOCKED;
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 15/22] nouveau: use devm_memremap_pages directly
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (13 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 14/22] nouveau: use alloc_page_vma directly Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13  9:43 ` [PATCH 16/22] mm: remove hmm_vma_alloc_locked_page Christoph Hellwig
                   ` (8 subsequent siblings)
  23 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

Just use devm_memremap_pages instead of hmm_devmem_add pages to allow
killing that wrapper which doesn't provide a whole lot of benefits.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/gpu/drm/nouveau/nouveau_dmem.c | 80 ++++++++++++--------------
 1 file changed, 38 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index a50f6fd2fe24..9e32bc8ecbc7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -72,7 +72,8 @@ struct nouveau_dmem_migrate {
 };
 
 struct nouveau_dmem {
-	struct hmm_devmem *devmem;
+	struct nouveau_drm *drm;
+	struct dev_pagemap pagemap;
 	struct nouveau_dmem_migrate migrate;
 	struct list_head chunk_free;
 	struct list_head chunk_full;
@@ -80,6 +81,11 @@ struct nouveau_dmem {
 	struct mutex mutex;
 };
 
+static inline struct nouveau_dmem *page_to_dmem(struct page *page)
+{
+	return container_of(page->pgmap, struct nouveau_dmem, pagemap);
+}
+
 struct nouveau_dmem_fault {
 	struct nouveau_drm *drm;
 	struct nouveau_fence *fence;
@@ -96,8 +102,7 @@ struct nouveau_migrate {
 	unsigned long dma_nr;
 };
 
-static void
-nouveau_dmem_free(struct hmm_devmem *devmem, struct page *page)
+static void nouveau_dmem_page_free(struct page *page)
 {
 	struct nouveau_dmem_chunk *chunk;
 	unsigned long idx;
@@ -260,29 +265,21 @@ static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = {
 	.finalize_and_map	= nouveau_dmem_fault_finalize_and_map,
 };
 
-static vm_fault_t
-nouveau_dmem_fault(struct hmm_devmem *devmem,
-		   struct vm_area_struct *vma,
-		   unsigned long addr,
-		   const struct page *page,
-		   unsigned int flags,
-		   pmd_t *pmdp)
+static vm_fault_t nouveau_dmem_devmem_migrate(struct vm_fault *vmf)
 {
-	struct drm_device *drm_dev = dev_get_drvdata(devmem->device);
+	struct nouveau_dmem *dmem = page_to_dmem(vmf->page);
 	unsigned long src[1] = {0}, dst[1] = {0};
-	struct nouveau_dmem_fault fault = {0};
+	struct nouveau_dmem_fault fault = { .drm = dmem->drm };
 	int ret;
 
-
-
 	/*
 	 * FIXME what we really want is to find some heuristic to migrate more
 	 * than just one page on CPU fault. When such fault happens it is very
 	 * likely that more surrounding page will CPU fault too.
 	 */
-	fault.drm = nouveau_drm(drm_dev);
-	ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vma, addr,
-			  addr + PAGE_SIZE, src, dst, &fault);
+	ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vmf->vma,
+			vmf->address, vmf->address + PAGE_SIZE,
+			src, dst, &fault);
 	if (ret)
 		return VM_FAULT_SIGBUS;
 
@@ -292,10 +289,9 @@ nouveau_dmem_fault(struct hmm_devmem *devmem,
 	return 0;
 }
 
-static const struct hmm_devmem_ops
-nouveau_dmem_devmem_ops = {
-	.free = nouveau_dmem_free,
-	.fault = nouveau_dmem_fault,
+static const struct dev_pagemap_ops nouveau_dmem_pagemap_ops = {
+	.page_free		= nouveau_dmem_page_free,
+	.migrate		= nouveau_dmem_devmem_migrate,
 };
 
 static int
@@ -581,7 +577,8 @@ void
 nouveau_dmem_init(struct nouveau_drm *drm)
 {
 	struct device *device = drm->dev->dev;
-	unsigned long i, size;
+	struct resource *res;
+	unsigned long i, size, pfn_first;
 	int ret;
 
 	/* This only make sense on PASCAL or newer */
@@ -591,6 +588,7 @@ nouveau_dmem_init(struct nouveau_drm *drm)
 	if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL)))
 		return;
 
+	drm->dmem->drm = drm;
 	mutex_init(&drm->dmem->mutex);
 	INIT_LIST_HEAD(&drm->dmem->chunk_free);
 	INIT_LIST_HEAD(&drm->dmem->chunk_full);
@@ -600,11 +598,8 @@ nouveau_dmem_init(struct nouveau_drm *drm)
 
 	/* Initialize migration dma helpers before registering memory */
 	ret = nouveau_dmem_migrate_init(drm);
-	if (ret) {
-		kfree(drm->dmem);
-		drm->dmem = NULL;
-		return;
-	}
+	if (ret)
+		goto out_free;
 
 	/*
 	 * FIXME we need some kind of policy to decide how much VRAM we
@@ -612,14 +607,16 @@ nouveau_dmem_init(struct nouveau_drm *drm)
 	 * and latter if we want to do thing like over commit then we
 	 * could revisit this.
 	 */
-	drm->dmem->devmem = hmm_devmem_add(&nouveau_dmem_devmem_ops,
-					   device, size);
-	if (IS_ERR(drm->dmem->devmem)) {
-		kfree(drm->dmem);
-		drm->dmem = NULL;
-		return;
-	}
-
+	res = devm_request_free_mem_region(device, &iomem_resource, size);
+	if (IS_ERR(res))
+		goto out_free;
+	drm->dmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
+	drm->dmem->pagemap.res = *res;
+	drm->dmem->pagemap.ops = &nouveau_dmem_pagemap_ops;
+	if (IS_ERR(devm_memremap_pages(device, &drm->dmem->pagemap)))
+		goto out_free;
+
+	pfn_first = res->start >> PAGE_SHIFT;
 	for (i = 0; i < (size / DMEM_CHUNK_SIZE); ++i) {
 		struct nouveau_dmem_chunk *chunk;
 		struct page *page;
@@ -632,8 +629,7 @@ nouveau_dmem_init(struct nouveau_drm *drm)
 		}
 
 		chunk->drm = drm;
-		chunk->pfn_first = drm->dmem->devmem->pfn_first;
-		chunk->pfn_first += (i * DMEM_CHUNK_NPAGES);
+		chunk->pfn_first = pfn_first + (i * DMEM_CHUNK_NPAGES);
 		list_add_tail(&chunk->list, &drm->dmem->chunk_empty);
 
 		page = pfn_to_page(chunk->pfn_first);
@@ -643,6 +639,10 @@ nouveau_dmem_init(struct nouveau_drm *drm)
 	}
 
 	NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20);
+	return;
+out_free:
+	kfree(drm->dmem);
+	drm->dmem = NULL;
 }
 
 static void
@@ -835,11 +835,7 @@ nouveau_dmem_page(struct nouveau_drm *drm, struct page *page)
 {
 	if (!is_device_private_page(page))
 		return false;
-
-	if (drm->dmem->devmem != page->pgmap->data)
-		return false;
-
-	return true;
+	return drm->dmem == page_to_dmem(page);
 }
 
 void
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 16/22] mm: remove hmm_vma_alloc_locked_page
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (14 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 15/22] nouveau: use devm_memremap_pages directly Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13  9:43 ` [PATCH 17/22] mm: remove hmm_devmem_add Christoph Hellwig
                   ` (7 subsequent siblings)
  23 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

The only user of it has just been removed, and there wasn't really any need
to wrap a basic memory allocator to start with.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/hmm.h |  3 ---
 mm/hmm.c            | 14 --------------
 2 files changed, 17 deletions(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 3c9a59dbfdb8..0e61d830b0a9 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -553,9 +553,6 @@ static inline void hmm_mm_init(struct mm_struct *mm) {}
 #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
 struct hmm_devmem;
 
-struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma,
-				       unsigned long addr);
-
 /*
  * struct hmm_devmem_ops - callback for ZONE_DEVICE memory events
  *
diff --git a/mm/hmm.c b/mm/hmm.c
index ff0f9568922b..c15283f9bbf0 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1293,20 +1293,6 @@ EXPORT_SYMBOL(hmm_range_dma_unmap);
 
 
 #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
-struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma,
-				       unsigned long addr)
-{
-	struct page *page;
-
-	page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
-	if (!page)
-		return NULL;
-	lock_page(page);
-	return page;
-}
-EXPORT_SYMBOL(hmm_vma_alloc_locked_page);
-
-
 static void hmm_devmem_ref_release(struct percpu_ref *ref)
 {
 	struct hmm_devmem *devmem;
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 17/22] mm: remove hmm_devmem_add
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (15 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 16/22] mm: remove hmm_vma_alloc_locked_page Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 19:42   ` Jason Gunthorpe
  2019-06-13  9:43 ` [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken Christoph Hellwig
                   ` (6 subsequent siblings)
  23 siblings, 1 reply; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

There isn't really much value add in the hmm_devmem_add wrapper.  Just
factor out a little helper to find the resource, and otherwise let the
driver implement the dev_pagemap_ops directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/vm/hmm.rst |  26 --------
 include/linux/hmm.h      | 129 ---------------------------------------
 mm/hmm.c                 | 115 ----------------------------------
 3 files changed, 270 deletions(-)

diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index 7b6eeda5a7c0..b1c960fe246d 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -336,32 +336,6 @@ directly using struct page for device memory which left most kernel code paths
 unaware of the difference. We only need to make sure that no one ever tries to
 map those pages from the CPU side.
 
-HMM provides a set of helpers to register and hotplug device memory as a new
-region needing a struct page. This is offered through a very simple API::
-
- struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
-                                   struct device *device,
-                                   unsigned long size);
- void hmm_devmem_remove(struct hmm_devmem *devmem);
-
-The hmm_devmem_ops is where most of the important things are::
-
- struct hmm_devmem_ops {
-     void (*free)(struct hmm_devmem *devmem, struct page *page);
-     vm_fault_t (*fault)(struct hmm_devmem *devmem,
-                  struct vm_area_struct *vma,
-                  unsigned long addr,
-                  struct page *page,
-                  unsigned flags,
-                  pmd_t *pmdp);
- };
-
-The first callback (free()) happens when the last reference on a device page is
-dropped. This means the device page is now free and no longer used by anyone.
-The second callback happens whenever the CPU tries to access a device page
-which it cannot do. This second callback must trigger a migration back to
-system memory.
-
 
 Migration to and from device memory
 ===================================
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 0e61d830b0a9..13152ab504ec 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -551,135 +551,6 @@ static inline void hmm_mm_init(struct mm_struct *mm) {}
 #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 
 #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
-struct hmm_devmem;
-
-/*
- * struct hmm_devmem_ops - callback for ZONE_DEVICE memory events
- *
- * @free: call when refcount on page reach 1 and thus is no longer use
- * @fault: call when there is a page fault to unaddressable memory
- *
- * Both callback happens from page_free() and page_fault() callback of struct
- * dev_pagemap respectively. See include/linux/memremap.h for more details on
- * those.
- *
- * The hmm_devmem_ops callback are just here to provide a coherent and
- * uniq API to device driver and device driver should not register their
- * own page_free() or page_fault() but rely on the hmm_devmem_ops call-
- * back.
- */
-struct hmm_devmem_ops {
-	/*
-	 * free() - free a device page
-	 * @devmem: device memory structure (see struct hmm_devmem)
-	 * @page: pointer to struct page being freed
-	 *
-	 * Call back occurs whenever a device page refcount reach 1 which
-	 * means that no one is holding any reference on the page anymore
-	 * (ZONE_DEVICE page have an elevated refcount of 1 as default so
-	 * that they are not release to the general page allocator).
-	 *
-	 * Note that callback has exclusive ownership of the page (as no
-	 * one is holding any reference).
-	 */
-	void (*free)(struct hmm_devmem *devmem, struct page *page);
-	/*
-	 * fault() - CPU page fault or get user page (GUP)
-	 * @devmem: device memory structure (see struct hmm_devmem)
-	 * @vma: virtual memory area containing the virtual address
-	 * @addr: virtual address that faulted or for which there is a GUP
-	 * @page: pointer to struct page backing virtual address (unreliable)
-	 * @flags: FAULT_FLAG_* (see include/linux/mm.h)
-	 * @pmdp: page middle directory
-	 * Return: VM_FAULT_MINOR/MAJOR on success or one of VM_FAULT_ERROR
-	 *   on error
-	 *
-	 * The callback occurs whenever there is a CPU page fault or GUP on a
-	 * virtual address. This means that the device driver must migrate the
-	 * page back to regular memory (CPU accessible).
-	 *
-	 * The device driver is free to migrate more than one page from the
-	 * fault() callback as an optimization. However if the device decides
-	 * to migrate more than one page it must always priotirize the faulting
-	 * address over the others.
-	 *
-	 * The struct page pointer is only given as a hint to allow quick
-	 * lookup of internal device driver data. A concurrent migration
-	 * might have already freed that page and the virtual address might
-	 * no longer be backed by it. So it should not be modified by the
-	 * callback.
-	 *
-	 * Note that mmap semaphore is held in read mode at least when this
-	 * callback occurs, hence the vma is valid upon callback entry.
-	 */
-	vm_fault_t (*fault)(struct hmm_devmem *devmem,
-		     struct vm_area_struct *vma,
-		     unsigned long addr,
-		     const struct page *page,
-		     unsigned int flags,
-		     pmd_t *pmdp);
-};
-
-/*
- * struct hmm_devmem - track device memory
- *
- * @completion: completion object for device memory
- * @pfn_first: first pfn for this resource (set by hmm_devmem_add())
- * @pfn_last: last pfn for this resource (set by hmm_devmem_add())
- * @resource: IO resource reserved for this chunk of memory
- * @pagemap: device page map for that chunk
- * @device: device to bind resource to
- * @ops: memory operations callback
- * @ref: per CPU refcount
- * @page_fault: callback when CPU fault on an unaddressable device page
- *
- * This is a helper structure for device drivers that do not wish to implement
- * the gory details related to hotplugging new memoy and allocating struct
- * pages.
- *
- * Device drivers can directly use ZONE_DEVICE memory on their own if they
- * wish to do so.
- *
- * The page_fault() callback must migrate page back, from device memory to
- * system memory, so that the CPU can access it. This might fail for various
- * reasons (device issues,  device have been unplugged, ...). When such error
- * conditions happen, the page_fault() callback must return VM_FAULT_SIGBUS and
- * set the CPU page table entry to "poisoned".
- *
- * Note that because memory cgroup charges are transferred to the device memory,
- * this should never fail due to memory restrictions. However, allocation
- * of a regular system page might still fail because we are out of memory. If
- * that happens, the page_fault() callback must return VM_FAULT_OOM.
- *
- * The page_fault() callback can also try to migrate back multiple pages in one
- * chunk, as an optimization. It must, however, prioritize the faulting address
- * over all the others.
- */
-
-struct hmm_devmem {
-	struct completion		completion;
-	unsigned long			pfn_first;
-	unsigned long			pfn_last;
-	struct resource			*resource;
-	struct device			*device;
-	struct dev_pagemap		pagemap;
-	const struct hmm_devmem_ops	*ops;
-	struct percpu_ref		ref;
-};
-
-/*
- * To add (hotplug) device memory, HMM assumes that there is no real resource
- * that reserves a range in the physical address space (this is intended to be
- * use by unaddressable device memory). It will reserve a physical range big
- * enough and allocate struct page for it.
- *
- * The device driver can wrap the hmm_devmem struct inside a private device
- * driver struct.
- */
-struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
-				  struct device *device,
-				  unsigned long size);
-
 /*
  * hmm_devmem_page_set_drvdata - set per-page driver data field
  *
diff --git a/mm/hmm.c b/mm/hmm.c
index c15283f9bbf0..5b2e9bb6063a 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1290,118 +1290,3 @@ long hmm_range_dma_unmap(struct hmm_range *range,
 }
 EXPORT_SYMBOL(hmm_range_dma_unmap);
 #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
-
-
-#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
-static void hmm_devmem_ref_release(struct percpu_ref *ref)
-{
-	struct hmm_devmem *devmem;
-
-	devmem = container_of(ref, struct hmm_devmem, ref);
-	complete(&devmem->completion);
-}
-
-static void hmm_devmem_ref_exit(void *data)
-{
-	struct percpu_ref *ref = data;
-	struct hmm_devmem *devmem;
-
-	devmem = container_of(ref, struct hmm_devmem, ref);
-	wait_for_completion(&devmem->completion);
-	percpu_ref_exit(ref);
-}
-
-static void hmm_devmem_ref_kill(struct dev_pagemap *pgmap)
-{
-	percpu_ref_kill(pgmap->ref);
-}
-
-static vm_fault_t hmm_devmem_migrate(struct vm_fault *vmf)
-{
-	struct hmm_devmem *devmem =
-		container_of(vmf->page->pgmap, struct hmm_devmem, pagemap);
-
-	return devmem->ops->fault(devmem, vmf->vma, vmf->address, vmf->page,
-			vmf->flags, vmf->pmd);
-}
-
-static void hmm_devmem_free(struct page *page)
-{
-	struct hmm_devmem *devmem =
-		container_of(page->pgmap, struct hmm_devmem, pagemap);
-
-	devmem->ops->free(devmem, page);
-}
-
-static const struct dev_pagemap_ops hmm_pagemap_ops = {
-	.page_free		= hmm_devmem_free,
-	.kill			= hmm_devmem_ref_kill,
-	.migrate		= hmm_devmem_migrate,
-};
-
-/*
- * hmm_devmem_add() - hotplug ZONE_DEVICE memory for device memory
- *
- * @ops: memory event device driver callback (see struct hmm_devmem_ops)
- * @device: device struct to bind the resource too
- * @size: size in bytes of the device memory to add
- * Return: pointer to new hmm_devmem struct ERR_PTR otherwise
- *
- * This function first finds an empty range of physical address big enough to
- * contain the new resource, and then hotplugs it as ZONE_DEVICE memory, which
- * in turn allocates struct pages. It does not do anything beyond that; all
- * events affecting the memory will go through the various callbacks provided
- * by hmm_devmem_ops struct.
- *
- * Device driver should call this function during device initialization and
- * is then responsible of memory management. HMM only provides helpers.
- */
-struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
-				  struct device *device,
-				  unsigned long size)
-{
-	struct hmm_devmem *devmem;
-	void *result;
-	int ret;
-
-	devmem = devm_kzalloc(device, sizeof(*devmem), GFP_KERNEL);
-	if (!devmem)
-		return ERR_PTR(-ENOMEM);
-
-	init_completion(&devmem->completion);
-	devmem->pfn_first = -1UL;
-	devmem->pfn_last = -1UL;
-	devmem->resource = NULL;
-	devmem->device = device;
-	devmem->ops = ops;
-
-	ret = percpu_ref_init(&devmem->ref, &hmm_devmem_ref_release,
-			      0, GFP_KERNEL);
-	if (ret)
-		return ERR_PTR(ret);
-
-	ret = devm_add_action_or_reset(device, hmm_devmem_ref_exit, &devmem->ref);
-	if (ret)
-		return ERR_PTR(ret);
-
-	devmem->resource = devm_request_free_mem_region(device, &iomem_resource,
-			size);
-	if (IS_ERR(devmem->resource))
-		return ERR_CAST(devmem->resource);
-	devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
-	devmem->pfn_last = devmem->pfn_first +
-			   (resource_size(devmem->resource) >> PAGE_SHIFT);
-
-	devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
-	devmem->pagemap.res = *devmem->resource;
-	devmem->pagemap.ops = &hmm_pagemap_ops;
-	devmem->pagemap.altmap_valid = false;
-	devmem->pagemap.ref = &devmem->ref;
-
-	result = devm_memremap_pages(devmem->device, &devmem->pagemap);
-	if (IS_ERR(result))
-		return result;
-	return devmem;
-}
-EXPORT_SYMBOL_GPL(hmm_devmem_add);
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (16 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 17/22] mm: remove hmm_devmem_add Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 19:44   ` Jason Gunthorpe
  2019-06-20 19:26   ` Michal Hocko
  2019-06-13  9:43 ` [PATCH 19/22] mm: simplify ZONE_DEVICE page private data Christoph Hellwig
                   ` (5 subsequent siblings)
  23 siblings, 2 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

The code hasn't been used since it was added to the tree, and doesn't
appear to actually be usable.  Mark it as BROKEN until either a user
comes along or we finally give up on it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 mm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/Kconfig b/mm/Kconfig
index 0d2ba7e1f43e..406fa45e9ecc 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -721,6 +721,7 @@ config DEVICE_PRIVATE
 config DEVICE_PUBLIC
 	bool "Addressable device memory (like GPU memory)"
 	depends on ARCH_HAS_HMM
+	depends on BROKEN
 	select HMM
 	select DEV_PAGEMAP_OPS
 
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 19/22] mm: simplify ZONE_DEVICE page private data
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (17 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13  9:43 ` [PATCH 20/22] mm: sort out the DEVICE_PRIVATE Kconfig mess Christoph Hellwig
                   ` (4 subsequent siblings)
  23 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

Remove the clumsy hmm_devmem_page_{get,set}_drvdata helpers, and
instead just access the page directly.  Also make the page data
a void pointer, and thus much easier to use.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/gpu/drm/nouveau/nouveau_dmem.c | 18 +++++++----------
 include/linux/hmm.h                    | 27 --------------------------
 include/linux/mm_types.h               |  2 +-
 mm/page_alloc.c                        |  8 ++++----
 4 files changed, 12 insertions(+), 43 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 9e32bc8ecbc7..27aa4e72abe9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -104,11 +104,8 @@ struct nouveau_migrate {
 
 static void nouveau_dmem_page_free(struct page *page)
 {
-	struct nouveau_dmem_chunk *chunk;
-	unsigned long idx;
-
-	chunk = (void *)hmm_devmem_page_get_drvdata(page);
-	idx = page_to_pfn(page) - chunk->pfn_first;
+	struct nouveau_dmem_chunk *chunk = page->zone_device_data;
+	unsigned long idx = page_to_pfn(page) - chunk->pfn_first;
 
 	/*
 	 * FIXME:
@@ -200,7 +197,7 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma,
 
 		dst_addr = fault->dma[fault->npages++];
 
-		chunk = (void *)hmm_devmem_page_get_drvdata(spage);
+		chunk = spage->zone_device_data;
 		src_addr = page_to_pfn(spage) - chunk->pfn_first;
 		src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset;
 
@@ -633,9 +630,8 @@ nouveau_dmem_init(struct nouveau_drm *drm)
 		list_add_tail(&chunk->list, &drm->dmem->chunk_empty);
 
 		page = pfn_to_page(chunk->pfn_first);
-		for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page) {
-			hmm_devmem_page_set_drvdata(page, (long)chunk);
-		}
+		for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page)
+			page->zone_device_data = chunk;
 	}
 
 	NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20);
@@ -698,7 +694,7 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma,
 		if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR)
 			continue;
 
-		chunk = (void *)hmm_devmem_page_get_drvdata(dpage);
+		chunk = dpage->zone_device_data;
 		dst_addr = page_to_pfn(dpage) - chunk->pfn_first;
 		dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset;
 
@@ -864,7 +860,7 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
 			continue;
 		}
 
-		chunk = (void *)hmm_devmem_page_get_drvdata(page);
+		chunk = page->zone_device_data;
 		addr = page_to_pfn(page) - chunk->pfn_first;
 		addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT;
 
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 13152ab504ec..e095a8b55dfa 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -550,33 +550,6 @@ static inline void hmm_mm_init(struct mm_struct *mm)
 static inline void hmm_mm_init(struct mm_struct *mm) {}
 #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 
-#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) ||  IS_ENABLED(CONFIG_DEVICE_PUBLIC)
-/*
- * hmm_devmem_page_set_drvdata - set per-page driver data field
- *
- * @page: pointer to struct page
- * @data: driver data value to set
- *
- * Because page can not be on lru we have an unsigned long that driver can use
- * to store a per page field. This just a simple helper to do that.
- */
-static inline void hmm_devmem_page_set_drvdata(struct page *page,
-					       unsigned long data)
-{
-	page->hmm_data = data;
-}
-
-/*
- * hmm_devmem_page_get_drvdata - get per page driver data field
- *
- * @page: pointer to struct page
- * Return: driver data value
- */
-static inline unsigned long hmm_devmem_page_get_drvdata(const struct page *page)
-{
-	return page->hmm_data;
-}
-#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
 #else /* IS_ENABLED(CONFIG_HMM) */
 static inline void hmm_mm_destroy(struct mm_struct *mm) {}
 static inline void hmm_mm_init(struct mm_struct *mm) {}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8ec38b11b361..f33a1289c101 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -158,7 +158,7 @@ struct page {
 		struct {	/* ZONE_DEVICE pages */
 			/** @pgmap: Points to the hosting device page map. */
 			struct dev_pagemap *pgmap;
-			unsigned long hmm_data;
+			void *zone_device_data;
 			unsigned long _zd_pad_1;	/* uses mapping */
 		};
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d66bc8abe0af..d069ee1f4c2e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5887,12 +5887,12 @@ void __ref memmap_init_zone_device(struct zone *zone,
 		__SetPageReserved(page);
 
 		/*
-		 * ZONE_DEVICE pages union ->lru with a ->pgmap back
-		 * pointer and hmm_data.  It is a bug if a ZONE_DEVICE
-		 * page is ever freed or placed on a driver-private list.
+		 * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer
+		 * and zone_device_data.  It is a bug if a ZONE_DEVICE page is
+		 * ever freed or placed on a driver-private list.
 		 */
 		page->pgmap = pgmap;
-		page->hmm_data = 0;
+		page->zone_device_data = 0;
 
 		/*
 		 * Mark the block movable so that blocks are reserved for
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 20/22] mm: sort out the DEVICE_PRIVATE Kconfig mess
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (18 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 19/22] mm: simplify ZONE_DEVICE page private data Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 19:55   ` Jason Gunthorpe
  2019-06-13  9:43 ` [PATCH 21/22] mm: remove the HMM config option Christoph Hellwig
                   ` (3 subsequent siblings)
  23 siblings, 1 reply; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

The ZONE_DEVICE support doesn't depend on anything HMM related, just on
various bits of arch support as indicated by the architecture.  Also
don't select the option from nouveau as it isn't present in many setups,
and depend on it instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/gpu/drm/nouveau/Kconfig | 2 +-
 mm/Kconfig                      | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index dba2613f7180..6303d203ab1d 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -85,10 +85,10 @@ config DRM_NOUVEAU_BACKLIGHT
 config DRM_NOUVEAU_SVM
 	bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support"
 	depends on ARCH_HAS_HMM
+	depends on DEVICE_PRIVATE
 	depends on DRM_NOUVEAU
 	depends on STAGING
 	select HMM_MIRROR
-	select DEVICE_PRIVATE
 	default n
 	help
 	  Say Y here if you want to enable experimental support for
diff --git a/mm/Kconfig b/mm/Kconfig
index 406fa45e9ecc..4dbd718c8cf4 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -677,13 +677,13 @@ config ARCH_HAS_HMM_MIRROR
 
 config ARCH_HAS_HMM
 	bool
-	default y
 	depends on (X86_64 || PPC64)
 	depends on ZONE_DEVICE
 	depends on MMU && 64BIT
 	depends on MEMORY_HOTPLUG
 	depends on MEMORY_HOTREMOVE
 	depends on SPARSEMEM_VMEMMAP
+	default y
 
 config MIGRATE_VMA_HELPER
 	bool
@@ -709,8 +709,7 @@ config HMM_MIRROR
 
 config DEVICE_PRIVATE
 	bool "Unaddressable device memory (GPU memory, ...)"
-	depends on ARCH_HAS_HMM
-	select HMM
+	depends on ZONE_DEVICE
 	select DEV_PAGEMAP_OPS
 
 	help
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 21/22] mm: remove the HMM config option
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (19 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 20/22] mm: sort out the DEVICE_PRIVATE Kconfig mess Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 20:01   ` Jason Gunthorpe
  2019-06-13  9:43 ` [PATCH 22/22] mm: don't select MIGRATE_VMA_HELPER from HMM_MIRROR Christoph Hellwig
                   ` (2 subsequent siblings)
  23 siblings, 1 reply; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

All the mm/hmm.c code is better keyed off HMM_MIRROR.  Also let nouveau
depend on it instead of the mix of a dummy dependency symbol plus the
actually selected one.  Drop various odd dependencies, as the code is
pretty portable.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/gpu/drm/nouveau/Kconfig |  3 +--
 include/linux/hmm.h             | 14 +++-----------
 include/linux/mm_types.h        |  2 +-
 mm/Kconfig                      | 27 +++------------------------
 mm/Makefile                     |  2 +-
 mm/hmm.c                        |  2 --
 6 files changed, 9 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index 6303d203ab1d..66c839d8e9d1 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -84,11 +84,10 @@ config DRM_NOUVEAU_BACKLIGHT
 
 config DRM_NOUVEAU_SVM
 	bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support"
-	depends on ARCH_HAS_HMM
 	depends on DEVICE_PRIVATE
 	depends on DRM_NOUVEAU
+	depends on HMM_MIRROR
 	depends on STAGING
-	select HMM_MIRROR
 	default n
 	help
 	  Say Y here if you want to enable experimental support for
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index e095a8b55dfa..64ea2fa00872 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -62,7 +62,7 @@
 #include <linux/kconfig.h>
 #include <asm/pgtable.h>
 
-#if IS_ENABLED(CONFIG_HMM)
+#ifdef CONFIG_HMM_MIRROR
 
 #include <linux/device.h>
 #include <linux/migrate.h>
@@ -324,9 +324,6 @@ static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range,
 	return hmm_device_entry_from_pfn(range, pfn);
 }
 
-
-
-#if IS_ENABLED(CONFIG_HMM_MIRROR)
 /*
  * Mirroring: how to synchronize device page table with CPU page table.
  *
@@ -546,13 +543,8 @@ static inline void hmm_mm_init(struct mm_struct *mm)
 {
 	mm->hmm = NULL;
 }
-#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */
-static inline void hmm_mm_init(struct mm_struct *mm) {}
-#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
-
-#else /* IS_ENABLED(CONFIG_HMM) */
-static inline void hmm_mm_destroy(struct mm_struct *mm) {}
+#else /* CONFIG_HMM_MIRROR */
 static inline void hmm_mm_init(struct mm_struct *mm) {}
-#endif /* IS_ENABLED(CONFIG_HMM) */
+#endif /* CONFIG_HMM_MIRROR */
 
 #endif /* LINUX_HMM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f33a1289c101..8d37182f8dbe 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -501,7 +501,7 @@ struct mm_struct {
 #endif
 		struct work_struct async_put_work;
 
-#if IS_ENABLED(CONFIG_HMM)
+#ifdef CONFIG_HMM_MIRROR
 		/* HMM needs to track a few things per mm */
 		struct hmm *hmm;
 #endif
diff --git a/mm/Kconfig b/mm/Kconfig
index 4dbd718c8cf4..73676cb4693f 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -669,37 +669,17 @@ config ZONE_DEVICE
 
 	  If FS_DAX is enabled, then say Y.
 
-config ARCH_HAS_HMM_MIRROR
-	bool
-	default y
-	depends on (X86_64 || PPC64)
-	depends on MMU && 64BIT
-
-config ARCH_HAS_HMM
-	bool
-	depends on (X86_64 || PPC64)
-	depends on ZONE_DEVICE
-	depends on MMU && 64BIT
-	depends on MEMORY_HOTPLUG
-	depends on MEMORY_HOTREMOVE
-	depends on SPARSEMEM_VMEMMAP
-	default y
-
 config MIGRATE_VMA_HELPER
 	bool
 
 config DEV_PAGEMAP_OPS
 	bool
 
-config HMM
-	bool
-	select MMU_NOTIFIER
-	select MIGRATE_VMA_HELPER
-
 config HMM_MIRROR
 	bool "HMM mirror CPU page table into a device page table"
-	depends on ARCH_HAS_HMM
-	select HMM
+	depends on MMU
+	select MMU_NOTIFIER
+	select MIGRATE_VMA_HELPER
 	help
 	  Select HMM_MIRROR if you want to mirror range of the CPU page table of a
 	  process into a device page table. Here, mirror means "keep synchronized".
@@ -721,7 +701,6 @@ config DEVICE_PUBLIC
 	bool "Addressable device memory (like GPU memory)"
 	depends on ARCH_HAS_HMM
 	depends on BROKEN
-	select HMM
 	select DEV_PAGEMAP_OPS
 
 	help
diff --git a/mm/Makefile b/mm/Makefile
index ac5e5ba78874..91c99040065c 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -102,5 +102,5 @@ obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
 obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
 obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
 obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
-obj-$(CONFIG_HMM) += hmm.o
+obj-$(CONFIG_HMM_MIRROR) += hmm.o
 obj-$(CONFIG_MEMFD_CREATE) += memfd.o
diff --git a/mm/hmm.c b/mm/hmm.c
index 5b2e9bb6063a..8d50c482469c 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -26,7 +26,6 @@
 #include <linux/mmu_notifier.h>
 #include <linux/memory_hotplug.h>
 
-#if IS_ENABLED(CONFIG_HMM_MIRROR)
 static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
 
 /**
@@ -1289,4 +1288,3 @@ long hmm_range_dma_unmap(struct hmm_range *range,
 	return cpages;
 }
 EXPORT_SYMBOL(hmm_range_dma_unmap);
-#endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* [PATCH 22/22] mm: don't select MIGRATE_VMA_HELPER from HMM_MIRROR
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (20 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 21/22] mm: remove the HMM config option Christoph Hellwig
@ 2019-06-13  9:43 ` Christoph Hellwig
  2019-06-13 20:04   ` Jason Gunthorpe
  2019-06-14  1:53   ` [Nouveau] " John Hubbard
  2019-06-13 14:16 ` dev_pagemap related cleanups Jason Gunthorpe
  2019-06-13 18:27 ` Dan Williams
  23 siblings, 2 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13  9:43 UTC (permalink / raw)
  To: Dan Williams, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

The migrate_vma helper is only used by noveau to migrate device private
pages around.  Other HMM_MIRROR users like amdgpu or infiniband don't
need it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/gpu/drm/nouveau/Kconfig | 1 +
 mm/Kconfig                      | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index 66c839d8e9d1..96b9814e6d06 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -88,6 +88,7 @@ config DRM_NOUVEAU_SVM
 	depends on DRM_NOUVEAU
 	depends on HMM_MIRROR
 	depends on STAGING
+	select MIGRATE_VMA_HELPER
 	default n
 	help
 	  Say Y here if you want to enable experimental support for
diff --git a/mm/Kconfig b/mm/Kconfig
index 73676cb4693f..eca88679b624 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -679,7 +679,6 @@ config HMM_MIRROR
 	bool "HMM mirror CPU page table into a device page table"
 	depends on MMU
 	select MMU_NOTIFIER
-	select MIGRATE_VMA_HELPER
 	help
 	  Select HMM_MIRROR if you want to mirror range of the CPU page table of a
 	  process into a device page table. Here, mirror means "keep synchronized".
-- 
2.20.1


^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (21 preceding siblings ...)
  2019-06-13  9:43 ` [PATCH 22/22] mm: don't select MIGRATE_VMA_HELPER from HMM_MIRROR Christoph Hellwig
@ 2019-06-13 14:16 ` Jason Gunthorpe
  2019-06-14  6:12   ` Christoph Hellwig
  2019-06-13 18:27 ` Dan Williams
  23 siblings, 1 reply; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 14:16 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:03AM +0200, Christoph Hellwig wrote:
> Hi Dan, Jérôme and Jason,
> 
> below is a series that cleans up the dev_pagemap interface so that
> it is more easily usable, which removes the need to wrap it in hmm
> and thus allowing to kill a lot of code

Do you want some of this to run through hmm.git? I see many patches
that don't seem to have inter-dependencies..

Thanks,
Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
                   ` (22 preceding siblings ...)
  2019-06-13 14:16 ` dev_pagemap related cleanups Jason Gunthorpe
@ 2019-06-13 18:27 ` Dan Williams
  2019-06-13 20:17   ` Logan Gunthorpe
                     ` (2 more replies)
  23 siblings, 3 replies; 103+ messages in thread
From: Dan Williams @ 2019-06-13 18:27 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jérôme Glisse, Jason Gunthorpe, Ben Skeggs, Linux MM,
	nouveau, Maling list - DRI developers, linux-nvdimm, linux-pci,
	Linux Kernel Mailing List, Andrew Morton

On Thu, Jun 13, 2019 at 2:43 AM Christoph Hellwig <hch@lst.de> wrote:
>
> Hi Dan, Jérôme and Jason,
>
> below is a series that cleans up the dev_pagemap interface so that
> it is more easily usable, which removes the need to wrap it in hmm
> and thus allowing to kill a lot of code
>
> Diffstat:
>
>  22 files changed, 245 insertions(+), 802 deletions(-)

Hooray!

> Git tree:
>
>     git://git.infradead.org/users/hch/misc.git hmm-devmem-cleanup

I just realized this collides with the dev_pagemap release rework in
Andrew's tree (commit ids below are from next.git and are not stable)

4422ee8476f0 mm/devm_memremap_pages: fix final page put race
771f0714d0dc PCI/P2PDMA: track pgmap references per resource, not globally
af37085de906 lib/genalloc: introduce chunk owners
e0047ff8aa77 PCI/P2PDMA: fix the gen_pool_add_virt() failure path
0315d47d6ae9 mm/devm_memremap_pages: introduce devm_memunmap_pages
216475c7eaa8 drivers/base/devres: introduce devm_release_action()

CONFLICT (content): Merge conflict in tools/testing/nvdimm/test/iomap.c
CONFLICT (content): Merge conflict in mm/hmm.c
CONFLICT (content): Merge conflict in kernel/memremap.c
CONFLICT (content): Merge conflict in include/linux/memremap.h
CONFLICT (content): Merge conflict in drivers/pci/p2pdma.c
CONFLICT (content): Merge conflict in drivers/nvdimm/pmem.c
CONFLICT (content): Merge conflict in drivers/dax/device.c
CONFLICT (content): Merge conflict in drivers/dax/dax-private.h

Perhaps we should pull those out and resend them through hmm.git?

It also turns out the nvdimm unit tests crash with this signature on
that branch where base v5.2-rc3 passes:

    BUG: kernel NULL pointer dereference, address: 0000000000000008
    [..]
    CPU: 15 PID: 1414 Comm: lt-libndctl Tainted: G           OE
5.2.0-rc3+ #3399
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
    RIP: 0010:percpu_ref_kill_and_confirm+0x1e/0x180
    [..]
    Call Trace:
     release_nodes+0x234/0x280
     device_release_driver_internal+0xe8/0x1b0
     bus_remove_device+0xf2/0x160
     device_del+0x166/0x370
     unregister_dev_dax+0x23/0x50
     release_nodes+0x234/0x280
     device_release_driver_internal+0xe8/0x1b0
     unbind_store+0x94/0x120
     kernfs_fop_write+0xf0/0x1a0
     vfs_write+0xb7/0x1b0
     ksys_write+0x5c/0xd0
     do_syscall_64+0x60/0x240

The crash bisects to:

    d8cc8bbe108c device-dax: use the dev_pagemap internal refcount

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 01/22] mm: remove the unused ARCH_HAS_HMM_DEVICE Kconfig option
  2019-06-13  9:43 ` [PATCH 01/22] mm: remove the unused ARCH_HAS_HMM_DEVICE Kconfig option Christoph Hellwig
@ 2019-06-13 18:30   ` Jason Gunthorpe
  0 siblings, 0 replies; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 18:30 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:04AM +0200, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  mm/Kconfig | 10 ----------
>  1 file changed, 10 deletions(-)

So long as we are willing to run a hmm.git we can merge only complete
features going forward.

Thus we don't need the crazy process described in the commit message
that (deliberately) introduced this unused kconfig.

I also tried to find something in-flight for 5.3 that would need this
and found nothing

Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 02/22] mm: remove the struct hmm_device infrastructure
  2019-06-13  9:43 ` [PATCH 02/22] mm: remove the struct hmm_device infrastructure Christoph Hellwig
@ 2019-06-13 18:46   ` Jason Gunthorpe
  2019-06-13 23:06   ` [Nouveau] " John Hubbard
  1 sibling, 0 replies; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 18:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:05AM +0200, Christoph Hellwig wrote:
> This code is a trivial wrapper around device model helpers, which
> should have been integrated into the driver device model usage from
> the start.  Assuming it actually had users, which it never had since
> the code was added more than 1 1/2 years ago.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  include/linux/hmm.h | 20 ------------
>  mm/hmm.c            | 80 ---------------------------------------------
>  2 files changed, 100 deletions(-)

I haven't looked in detail at this device memory stuff.. But I did
check a bit through the mailing list archives for some clue what this
was supposed to be for (wow, this is from 2016!)

The commit that added this says:
  This introduce a dummy HMM device class so device driver can use it to
  create hmm_device for the sole purpose of registering device memory.

Which I just can't understand at all. 

If we need a 'struct device' for some 'device memory' purpose then it
probably ought to be the 'struct pci_device' holding the BAR, not a
fake device.

I also can't comprehend why a supposed fake device would need a
chardev, with a stanadrd 'hmm_deviceX' name, without also defining a
core kernel ABI for that char dev..

If this comes back it needs a proper explanation and review, with a
user.

Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 03/22] mm: remove hmm_devmem_add_resource
  2019-06-13  9:43 ` [PATCH 03/22] mm: remove hmm_devmem_add_resource Christoph Hellwig
@ 2019-06-13 18:52   ` Jason Gunthorpe
  2019-06-14  6:19     ` Christoph Hellwig
  2019-06-14  0:54   ` [Nouveau] " John Hubbard
  2019-06-20 19:32   ` Michal Hocko
  2 siblings, 1 reply; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 18:52 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:06AM +0200, Christoph Hellwig wrote:
> This function has never been used since it was first added to the kernel
> more than a year and a half ago, and if we ever grow a consumer of the
> MEMORY_DEVICE_PUBLIC infrastructure it can easily use devm_memremap_pages
> directly now that we've simplified the API for it.

nit: Have we simplified the interface for devm_memremap_pages() at
this point, or are you talking about later patches in this series.

I checked this and all the called functions are exported symbols, so
there is no blocker for a future driver to call devm_memremap_pages(),
maybe even with all this boiler plate, in future.

If we eventually get many users that need some simplified registration
then we should add a devm_memremap_pages_simplified() interface and
factor out that code when we can see the pattern.

Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 04/22] mm: don't clear ->mapping in hmm_devmem_free
  2019-06-13  9:43 ` [PATCH 04/22] mm: don't clear ->mapping in hmm_devmem_free Christoph Hellwig
@ 2019-06-13 19:05   ` Jason Gunthorpe
  2019-06-14  6:21     ` Christoph Hellwig
  2019-06-14  1:46   ` John Hubbard
  2019-06-20 19:36   ` Michal Hocko
  2 siblings, 1 reply; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 19:05 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:07AM +0200, Christoph Hellwig wrote:
> ->mapping isn't even used by HMM users, and the field at the same offset
> in the zone_device part of the union is declared as pad.  (Which btw is
> rather confusing, as DAX uses ->pgmap and ->mapping from two different
> sides of the union, but DAX doesn't use hmm_devmem_free).
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  mm/hmm.c | 2 --
>  1 file changed, 2 deletions(-)

Hurm, is hmm following this comment from mm_types.h?

 * If you allocate the page using alloc_pages(), you can use some of the
 * space in struct page for your own purposes.  The five words in the main
 * union are available, except for bit 0 of the first word which must be
 * kept clear.  Many users use this word to store a pointer to an object
 * which is guaranteed to be aligned.  If you use the same storage as
 * page->mapping, you must restore it to NULL before freeing the page.

Maybe the assumption was that a driver is using ->mapping ?

However, nouveau is the only driver that uses this path, and it never
touches page->mapping either (nor in -next).

It looks like if a driver were to start using mapping then the driver
should be responsible to set it back to NULL before being done with
the page.

Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 06/22] mm: factor out a devm_request_free_mem_region helper
  2019-06-13  9:43 ` [PATCH 06/22] mm: factor out a devm_request_free_mem_region helper Christoph Hellwig
@ 2019-06-13 19:16   ` Jason Gunthorpe
  2019-06-14  6:24     ` Christoph Hellwig
  2019-06-15  2:21   ` John Hubbard
  1 sibling, 1 reply; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 19:16 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:09AM +0200, Christoph Hellwig wrote:
> Keep the physical address allocation that hmm_add_device does with the
> rest of the resource code, and allow future reuse of it without the hmm
> wrapper.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
>  include/linux/ioport.h |  2 ++
>  kernel/resource.c      | 39 +++++++++++++++++++++++++++++++++++++++
>  mm/hmm.c               | 33 ++++-----------------------------
>  3 files changed, 45 insertions(+), 29 deletions(-)
> 
> diff --git a/include/linux/ioport.h b/include/linux/ioport.h
> index da0ebaec25f0..76a33ae3bf6c 100644
> +++ b/include/linux/ioport.h
> @@ -286,6 +286,8 @@ static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
>         return (r1->start <= r2->end && r1->end >= r2->start);
>  }
>  
> +struct resource *devm_request_free_mem_region(struct device *dev,
> +		struct resource *base, unsigned long size);
>  
>  #endif /* __ASSEMBLY__ */
>  #endif	/* _LINUX_IOPORT_H */
> diff --git a/kernel/resource.c b/kernel/resource.c
> index 158f04ec1d4f..99c58134ed1c 100644
> +++ b/kernel/resource.c
> @@ -1628,6 +1628,45 @@ void resource_list_free(struct list_head *head)
>  }
>  EXPORT_SYMBOL(resource_list_free);
>  
> +#ifdef CONFIG_DEVICE_PRIVATE
> +/**
> + * devm_request_free_mem_region - find free region for device private memory
> + *
> + * @dev: device struct to bind the resource too
> + * @size: size in bytes of the device memory to add
> + * @base: resource tree to look in
> + *
> + * This function tries to find an empty range of physical address big enough to
> + * contain the new resource, so that it can later be hotpluged as ZONE_DEVICE
> + * memory, which in turn allocates struct pages.
> + */
> +struct resource *devm_request_free_mem_region(struct device *dev,
> +		struct resource *base, unsigned long size)
> +{
> +	resource_size_t end, addr;
> +	struct resource *res;
> +
> +	size = ALIGN(size, 1UL << PA_SECTION_SHIFT);
> +	end = min_t(unsigned long, base->end, (1UL << MAX_PHYSMEM_BITS) - 1);

Even fixed it to use min_t

> +	addr = end - size + 1UL;
> +	for (; addr > size && addr >= base->start; addr -= size) {
> +		if (region_intersects(addr, size, 0, IORES_DESC_NONE) !=
> +				REGION_DISJOINT)
> +			continue;

The FIXME about the algorithm cost seems justified though, yikes.

> +
> +		res = devm_request_mem_region(dev, addr, size, dev_name(dev));
> +		if (!res)
> +			return ERR_PTR(-ENOMEM);
> +		res->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY;

I wonder if IORES_DESC_DEVICE_PRIVATE_MEMORY should be a function
argument?

Not really any substantive remark, so

Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 07/22] memremap: move dev_pagemap callbacks into a separate structure
  2019-06-13  9:43 ` [PATCH 07/22] memremap: move dev_pagemap callbacks into a separate structure Christoph Hellwig
@ 2019-06-13 19:18   ` Jason Gunthorpe
  2019-06-13 20:14   ` Logan Gunthorpe
  1 sibling, 0 replies; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 19:18 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:10AM +0200, Christoph Hellwig wrote:
> The dev_pagemap is a growing too many callbacks.  Move them into a
> separate ops structure so that they are not duplicated for multiple
> instances, and an attacker can't easily overwrite them.

Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 08/22] memremap: pass a struct dev_pagemap to ->kill
  2019-06-13  9:43 ` [PATCH 08/22] memremap: pass a struct dev_pagemap to ->kill Christoph Hellwig
@ 2019-06-13 19:26   ` Jason Gunthorpe
  2019-06-13 20:12   ` Logan Gunthorpe
  1 sibling, 0 replies; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 19:26 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:11AM +0200, Christoph Hellwig wrote:
> Passing the actual typed structure leads to more understandable code
> vs the actual references.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
>  drivers/dax/device.c              | 7 +++----
>  drivers/nvdimm/pmem.c             | 6 +++---
>  drivers/pci/p2pdma.c              | 6 +++---
>  include/linux/memremap.h          | 2 +-
>  kernel/memremap.c                 | 4 ++--
>  mm/hmm.c                          | 4 ++--
>  tools/testing/nvdimm/test/iomap.c | 6 ++----
>  7 files changed, 16 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> index 4adab774dade..e23fa1bd8c97 100644
> +++ b/drivers/dax/device.c
> @@ -37,13 +37,12 @@ static void dev_dax_percpu_exit(void *data)
>  	percpu_ref_exit(ref);
>  }
>  
> -static void dev_dax_percpu_kill(struct percpu_ref *data)
> +static void dev_dax_percpu_kill(struct dev_pagemap *pgmap)
>  {

Looks like it was always like this, but I also can't see a reason to
use the percpu as a handle for a dev_pagemap callback.

Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 09/22] memremap: lift the devmap_enable manipulation into devm_memremap_pages
  2019-06-13  9:43 ` [PATCH 09/22] memremap: lift the devmap_enable manipulation into devm_memremap_pages Christoph Hellwig
@ 2019-06-13 19:34   ` Jason Gunthorpe
  2019-06-13 20:13     ` Dan Williams
  2019-06-14  6:28     ` Christoph Hellwig
  0 siblings, 2 replies; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 19:34 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:12AM +0200, Christoph Hellwig wrote:
> Just check if there is a ->page_free operation set and take care of the
> static key enable, as well as the put using device managed resources.
> diff --git a/mm/hmm.c b/mm/hmm.c
> index c76a1b5defda..6dc769feb2e1 100644
> +++ b/mm/hmm.c
> @@ -1378,8 +1378,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
>  	void *result;
>  	int ret;
>  
> -	dev_pagemap_get_ops();
> -

Where was the matching dev_pagemap_put_ops() for this hmm case? This
is a bug fix too?

The nouveau driver is the only one to actually call this hmm function
and it does it as part of a probe function. 

Seems reasonable, however, in the unlikely event that it fails to init
'dmem' the driver will retain a dev_pagemap_get_ops until it unloads.
This imbalance doesn't seem worth worrying about.

Reviewed-by: Christoph Hellwig <hch@lst.de>

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 11/22] memremap: remove the data field in struct dev_pagemap
  2019-06-13  9:43 ` [PATCH 11/22] memremap: remove the data field in struct dev_pagemap Christoph Hellwig
@ 2019-06-13 19:37   ` Jason Gunthorpe
  0 siblings, 0 replies; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 19:37 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:14AM +0200, Christoph Hellwig wrote:
> struct dev_pagemap is always embedded into a containing structure, so
> there is no need to an additional private data field.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/nvdimm/pmem.c    | 2 +-
>  include/linux/memremap.h | 3 +--
>  kernel/memremap.c        | 2 +-
>  mm/hmm.c                 | 9 +++++----
>  4 files changed, 8 insertions(+), 8 deletions(-)

Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 14/22] nouveau: use alloc_page_vma directly
  2019-06-13  9:43 ` [PATCH 14/22] nouveau: use alloc_page_vma directly Christoph Hellwig
@ 2019-06-13 19:39   ` Jason Gunthorpe
  0 siblings, 0 replies; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 19:39 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:17AM +0200, Christoph Hellwig wrote:
> hmm_vma_alloc_locked_page is scheduled to go away, use the proper
> mm function directly.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/gpu/drm/nouveau/nouveau_dmem.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)

Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 17/22] mm: remove hmm_devmem_add
  2019-06-13  9:43 ` [PATCH 17/22] mm: remove hmm_devmem_add Christoph Hellwig
@ 2019-06-13 19:42   ` Jason Gunthorpe
  2019-06-14  6:39     ` Christoph Hellwig
  0 siblings, 1 reply; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 19:42 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:20AM +0200, Christoph Hellwig wrote:
> There isn't really much value add in the hmm_devmem_add wrapper.  Just
> factor out a little helper to find the resource, and otherwise let the
> driver implement the dev_pagemap_ops directly.

Was this commit message written when other patches were squashed in
here? I think the helper this mentions was from an earlier patch

> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  Documentation/vm/hmm.rst |  26 --------
>  include/linux/hmm.h      | 129 ---------------------------------------
>  mm/hmm.c                 | 115 ----------------------------------
>  3 files changed, 270 deletions(-)

I looked for in-flight patches that might be using these APIs and
found nothing. To be sent patches can use the new API with no loss in
functionality...

Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-13  9:43 ` [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken Christoph Hellwig
@ 2019-06-13 19:44   ` Jason Gunthorpe
  2019-06-13 19:53     ` Ralph Campbell
  2019-06-20 19:26   ` Michal Hocko
  1 sibling, 1 reply; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 19:44 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:21AM +0200, Christoph Hellwig wrote:
> The code hasn't been used since it was added to the tree, and doesn't
> appear to actually be usable.  Mark it as BROKEN until either a user
> comes along or we finally give up on it.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
>  mm/Kconfig | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 0d2ba7e1f43e..406fa45e9ecc 100644
> +++ b/mm/Kconfig
> @@ -721,6 +721,7 @@ config DEVICE_PRIVATE
>  config DEVICE_PUBLIC
>  	bool "Addressable device memory (like GPU memory)"
>  	depends on ARCH_HAS_HMM
> +	depends on BROKEN
>  	select HMM
>  	select DEV_PAGEMAP_OPS

This seems a bit harsh, we do have another kconfig that selects this
one today:

config DRM_NOUVEAU_SVM
        bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support"
        depends on ARCH_HAS_HMM
        depends on DRM_NOUVEAU
        depends on STAGING
        select HMM_MIRROR
        select DEVICE_PRIVATE
        default n
        help
          Say Y here if you want to enable experimental support for
          Shared Virtual Memory (SVM).

Maybe it should be depends on STAGING not broken?

or maybe nouveau_svm doesn't actually need DEVICE_PRIVATE?

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-13 19:44   ` Jason Gunthorpe
@ 2019-06-13 19:53     ` Ralph Campbell
  2019-06-13 19:58       ` Jason Gunthorpe
  0 siblings, 1 reply; 103+ messages in thread
From: Ralph Campbell @ 2019-06-13 19:53 UTC (permalink / raw)
  To: Jason Gunthorpe, Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel


On 6/13/19 12:44 PM, Jason Gunthorpe wrote:
> On Thu, Jun 13, 2019 at 11:43:21AM +0200, Christoph Hellwig wrote:
>> The code hasn't been used since it was added to the tree, and doesn't
>> appear to actually be usable.  Mark it as BROKEN until either a user
>> comes along or we finally give up on it.
>>
>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>>   mm/Kconfig | 1 +
>>   1 file changed, 1 insertion(+)
>>
>> diff --git a/mm/Kconfig b/mm/Kconfig
>> index 0d2ba7e1f43e..406fa45e9ecc 100644
>> +++ b/mm/Kconfig
>> @@ -721,6 +721,7 @@ config DEVICE_PRIVATE
>>   config DEVICE_PUBLIC
>>   	bool "Addressable device memory (like GPU memory)"
>>   	depends on ARCH_HAS_HMM
>> +	depends on BROKEN
>>   	select HMM
>>   	select DEV_PAGEMAP_OPS
> 
> This seems a bit harsh, we do have another kconfig that selects this
> one today:
> 
> config DRM_NOUVEAU_SVM
>          bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support"
>          depends on ARCH_HAS_HMM
>          depends on DRM_NOUVEAU
>          depends on STAGING
>          select HMM_MIRROR
>          select DEVICE_PRIVATE
>          default n
>          help
>            Say Y here if you want to enable experimental support for
>            Shared Virtual Memory (SVM).
> 
> Maybe it should be depends on STAGING not broken?
> 
> or maybe nouveau_svm doesn't actually need DEVICE_PRIVATE?
> 
> Jason

I think you are confusing DEVICE_PRIVATE for DEVICE_PUBLIC.
DRM_NOUVEAU_SVM does use DEVICE_PRIVATE but not DEVICE_PUBLIC.


^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 20/22] mm: sort out the DEVICE_PRIVATE Kconfig mess
  2019-06-13  9:43 ` [PATCH 20/22] mm: sort out the DEVICE_PRIVATE Kconfig mess Christoph Hellwig
@ 2019-06-13 19:55   ` Jason Gunthorpe
  0 siblings, 0 replies; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 19:55 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:23AM +0200, Christoph Hellwig wrote:
> The ZONE_DEVICE support doesn't depend on anything HMM related, just on
> various bits of arch support as indicated by the architecture.  Also
> don't select the option from nouveau as it isn't present in many setups,
> and depend on it instead.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
>  drivers/gpu/drm/nouveau/Kconfig | 2 +-
>  mm/Kconfig                      | 5 ++---
>  2 files changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
> index dba2613f7180..6303d203ab1d 100644
> +++ b/drivers/gpu/drm/nouveau/Kconfig
> @@ -85,10 +85,10 @@ config DRM_NOUVEAU_BACKLIGHT
>  config DRM_NOUVEAU_SVM
>  	bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support"
>  	depends on ARCH_HAS_HMM
> +	depends on DEVICE_PRIVATE
>  	depends on DRM_NOUVEAU
>  	depends on STAGING
>  	select HMM_MIRROR
> -	select DEVICE_PRIVATE
>  	default n
>  	help
>  	  Say Y here if you want to enable experimental support for

Ben, I heard you might have a patch like this in your tree, but I
don't think I could find your tree?? 

Do you have any nouveau/Kconfig patches that might conflict? Thanks

Does this fix the randconfigs failures that have been reported?

> diff --git a/mm/Kconfig b/mm/Kconfig
> index 406fa45e9ecc..4dbd718c8cf4 100644
> +++ b/mm/Kconfig
> @@ -677,13 +677,13 @@ config ARCH_HAS_HMM_MIRROR
>  
>  config ARCH_HAS_HMM
>  	bool
> -	default y
>  	depends on (X86_64 || PPC64)
>  	depends on ZONE_DEVICE
>  	depends on MMU && 64BIT
>  	depends on MEMORY_HOTPLUG
>  	depends on MEMORY_HOTREMOVE
>  	depends on SPARSEMEM_VMEMMAP
> +	default y

What is the reason we have this ARCH thing anyhow? Does hmm have arch
dependencies someplace?

Thanks
Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-13 19:53     ` Ralph Campbell
@ 2019-06-13 19:58       ` Jason Gunthorpe
  2019-06-14  0:43         ` Ira Weiny
  0 siblings, 1 reply; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 19:58 UTC (permalink / raw)
  To: Ralph Campbell
  Cc: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Ben Skeggs, linux-mm, nouveau, dri-devel, linux-nvdimm,
	linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 12:53:02PM -0700, Ralph Campbell wrote:
> 
> On 6/13/19 12:44 PM, Jason Gunthorpe wrote:
> > On Thu, Jun 13, 2019 at 11:43:21AM +0200, Christoph Hellwig wrote:
> > > The code hasn't been used since it was added to the tree, and doesn't
> > > appear to actually be usable.  Mark it as BROKEN until either a user
> > > comes along or we finally give up on it.
> > > 
> > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > >   mm/Kconfig | 1 +
> > >   1 file changed, 1 insertion(+)
> > > 
> > > diff --git a/mm/Kconfig b/mm/Kconfig
> > > index 0d2ba7e1f43e..406fa45e9ecc 100644
> > > +++ b/mm/Kconfig
> > > @@ -721,6 +721,7 @@ config DEVICE_PRIVATE
> > >   config DEVICE_PUBLIC
> > >   	bool "Addressable device memory (like GPU memory)"
> > >   	depends on ARCH_HAS_HMM
> > > +	depends on BROKEN
> > >   	select HMM
> > >   	select DEV_PAGEMAP_OPS
> > 
> > This seems a bit harsh, we do have another kconfig that selects this
> > one today:
> > 
> > config DRM_NOUVEAU_SVM
> >          bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support"
> >          depends on ARCH_HAS_HMM
> >          depends on DRM_NOUVEAU
> >          depends on STAGING
> >          select HMM_MIRROR
> >          select DEVICE_PRIVATE
> >          default n
> >          help
> >            Say Y here if you want to enable experimental support for
> >            Shared Virtual Memory (SVM).
> > 
> > Maybe it should be depends on STAGING not broken?
> > 
> > or maybe nouveau_svm doesn't actually need DEVICE_PRIVATE?
> > 
> > Jason
> 
> I think you are confusing DEVICE_PRIVATE for DEVICE_PUBLIC.
> DRM_NOUVEAU_SVM does use DEVICE_PRIVATE but not DEVICE_PUBLIC.

Indeed you are correct, never mind

Hum, so the only thing this config does is short circuit here:

static inline bool is_device_public_page(const struct page *page)
{
        return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
                IS_ENABLED(CONFIG_DEVICE_PUBLIC) &&
                is_zone_device_page(page) &&
                page->pgmap->type == MEMORY_DEVICE_PUBLIC;
}

Which is called all over the place.. 

So, yes, we really don't want any distro or something to turn this on
until it has a use.

Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 21/22] mm: remove the HMM config option
  2019-06-13  9:43 ` [PATCH 21/22] mm: remove the HMM config option Christoph Hellwig
@ 2019-06-13 20:01   ` Jason Gunthorpe
  2019-06-14  6:47     ` Christoph Hellwig
  0 siblings, 1 reply; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 20:01 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:24AM +0200, Christoph Hellwig wrote:
> All the mm/hmm.c code is better keyed off HMM_MIRROR.  Also let nouveau
> depend on it instead of the mix of a dummy dependency symbol plus the
> actually selected one.  Drop various odd dependencies, as the code is
> pretty portable.

I don't really know, but I thought this needed the arch restriction
for the same reason get_user_pages has various unique arch specific
implementations (it does seem to have some open coded GUP like thing)?

I was hoping we could do this after your common gup series? But sooner
is better too.

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 22/22] mm: don't select MIGRATE_VMA_HELPER from HMM_MIRROR
  2019-06-13  9:43 ` [PATCH 22/22] mm: don't select MIGRATE_VMA_HELPER from HMM_MIRROR Christoph Hellwig
@ 2019-06-13 20:04   ` Jason Gunthorpe
  2019-06-14  1:53   ` [Nouveau] " John Hubbard
  1 sibling, 0 replies; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 20:04 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 11:43:25AM +0200, Christoph Hellwig wrote:
> The migrate_vma helper is only used by noveau to migrate device private
> pages around.  Other HMM_MIRROR users like amdgpu or infiniband don't
> need it.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/gpu/drm/nouveau/Kconfig | 1 +
>  mm/Kconfig                      | 1 -
>  2 files changed, 1 insertion(+), 1 deletion(-)

Yes, the thing that calls migrate_vma() should be the thing that has
the kconfig stuff.

Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 08/22] memremap: pass a struct dev_pagemap to ->kill
  2019-06-13  9:43 ` [PATCH 08/22] memremap: pass a struct dev_pagemap to ->kill Christoph Hellwig
  2019-06-13 19:26   ` Jason Gunthorpe
@ 2019-06-13 20:12   ` Logan Gunthorpe
  2019-06-13 20:15     ` Dan Williams
  1 sibling, 1 reply; 103+ messages in thread
From: Logan Gunthorpe @ 2019-06-13 20:12 UTC (permalink / raw)
  To: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs
  Cc: linux-nvdimm, linux-pci, linux-kernel, dri-devel, linux-mm, nouveau



On 2019-06-13 3:43 a.m., Christoph Hellwig wrote:
> Passing the actual typed structure leads to more understandable code
> vs the actual references.

Ha, ok, I originally suggested this to Dan when he introduced the
callback[1].

Reviewed-by: Logan Gunthorpe <logang@deltatee.com>

Logan

[1]
https://lore.kernel.org/lkml/8f0cae82-130f-8a64-cfbd-fda5fd76bb79@deltatee.com/T/#u


^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 09/22] memremap: lift the devmap_enable manipulation into devm_memremap_pages
  2019-06-13 19:34   ` Jason Gunthorpe
@ 2019-06-13 20:13     ` Dan Williams
  2019-06-14  6:28     ` Christoph Hellwig
  1 sibling, 0 replies; 103+ messages in thread
From: Dan Williams @ 2019-06-13 20:13 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Jérôme Glisse, Ben Skeggs, linux-mm,
	nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 12:35 PM Jason Gunthorpe <jgg@mellanox.com> wrote:
>
> On Thu, Jun 13, 2019 at 11:43:12AM +0200, Christoph Hellwig wrote:
> > Just check if there is a ->page_free operation set and take care of the
> > static key enable, as well as the put using device managed resources.
> > diff --git a/mm/hmm.c b/mm/hmm.c
> > index c76a1b5defda..6dc769feb2e1 100644
> > +++ b/mm/hmm.c
> > @@ -1378,8 +1378,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
> >       void *result;
> >       int ret;
> >
> > -     dev_pagemap_get_ops();
> > -
>
> Where was the matching dev_pagemap_put_ops() for this hmm case? This
> is a bug fix too?
>

It never existed. HMM turned on the facility and made everyone's
put_page() operations slower regardless of whether HMM was in active
use.

> The nouveau driver is the only one to actually call this hmm function
> and it does it as part of a probe function.
>
> Seems reasonable, however, in the unlikely event that it fails to init
> 'dmem' the driver will retain a dev_pagemap_get_ops until it unloads.
> This imbalance doesn't seem worth worrying about.

Right, unless/until the overhead of checking for put_page() callbacks
starts to hurt leaving pagemap_ops tied to lifetime of the driver load
seems acceptable because who unbinds their GPU device at runtime? On
the other hand it was simple enough for the pmem driver to drop the
reference each time a device was unbound just to close the loop.

>
> Reviewed-by: Christoph Hellwig <hch@lst.de>

...minor typo.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 07/22] memremap: move dev_pagemap callbacks into a separate structure
  2019-06-13  9:43 ` [PATCH 07/22] memremap: move dev_pagemap callbacks into a separate structure Christoph Hellwig
  2019-06-13 19:18   ` Jason Gunthorpe
@ 2019-06-13 20:14   ` Logan Gunthorpe
  1 sibling, 0 replies; 103+ messages in thread
From: Logan Gunthorpe @ 2019-06-13 20:14 UTC (permalink / raw)
  To: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs
  Cc: linux-nvdimm, linux-pci, linux-kernel, dri-devel, linux-mm, nouveau



On 2019-06-13 3:43 a.m., Christoph Hellwig wrote:
> The dev_pagemap is a growing too many callbacks.  Move them into a
> separate ops structure so that they are not duplicated for multiple
> instances, and an attacker can't easily overwrite them.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/dax/device.c              |  6 +++++-
>  drivers/nvdimm/pmem.c             | 18 +++++++++++++-----
>  drivers/pci/p2pdma.c              |  5 ++++-
>  include/linux/memremap.h          | 29 +++++++++++++++--------------
>  kernel/memremap.c                 | 12 ++++++------
>  mm/hmm.c                          |  8 ++++++--
>  tools/testing/nvdimm/test/iomap.c |  2 +-
>  7 files changed, 50 insertions(+), 30 deletions(-)

Looks good to me,

Reviewed-by: Logan Gunthorpe <logang@deltatee.com>

Logan

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 08/22] memremap: pass a struct dev_pagemap to ->kill
  2019-06-13 20:12   ` Logan Gunthorpe
@ 2019-06-13 20:15     ` Dan Williams
  0 siblings, 0 replies; 103+ messages in thread
From: Dan Williams @ 2019-06-13 20:15 UTC (permalink / raw)
  To: Logan Gunthorpe
  Cc: Christoph Hellwig, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, linux-nvdimm, linux-pci, Linux Kernel Mailing List,
	Maling list - DRI developers, Linux MM, nouveau

On Thu, Jun 13, 2019 at 1:12 PM Logan Gunthorpe <logang@deltatee.com> wrote:
>
>
>
> On 2019-06-13 3:43 a.m., Christoph Hellwig wrote:
> > Passing the actual typed structure leads to more understandable code
> > vs the actual references.
>
> Ha, ok, I originally suggested this to Dan when he introduced the
> callback[1].
>
> Reviewed-by: Logan Gunthorpe <logang@deltatee.com>

Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reported-by: Logan Gunthorpe <logang@deltatee.com> :)


>
> Logan
>
> [1]
> https://lore.kernel.org/lkml/8f0cae82-130f-8a64-cfbd-fda5fd76bb79@deltatee.com/T/#u
>

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-13 18:27 ` Dan Williams
@ 2019-06-13 20:17   ` Logan Gunthorpe
  2019-06-13 20:21     ` Dan Williams
  2019-06-13 20:40   ` Jason Gunthorpe
  2019-06-14  6:13   ` Christoph Hellwig
  2 siblings, 1 reply; 103+ messages in thread
From: Logan Gunthorpe @ 2019-06-13 20:17 UTC (permalink / raw)
  To: Dan Williams, Christoph Hellwig
  Cc: linux-nvdimm, nouveau, Linux Kernel Mailing List,
	Maling list - DRI developers, Linux MM, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs, linux-pci, Andrew Morton



On 2019-06-13 12:27 p.m., Dan Williams wrote:
> On Thu, Jun 13, 2019 at 2:43 AM Christoph Hellwig <hch@lst.de> wrote:
>>
>> Hi Dan, Jérôme and Jason,
>>
>> below is a series that cleans up the dev_pagemap interface so that
>> it is more easily usable, which removes the need to wrap it in hmm
>> and thus allowing to kill a lot of code
>>
>> Diffstat:
>>
>>  22 files changed, 245 insertions(+), 802 deletions(-)
> 
> Hooray!
> 
>> Git tree:
>>
>>     git://git.infradead.org/users/hch/misc.git hmm-devmem-cleanup
> 
> I just realized this collides with the dev_pagemap release rework in
> Andrew's tree (commit ids below are from next.git and are not stable)
> 
> 4422ee8476f0 mm/devm_memremap_pages: fix final page put race
> 771f0714d0dc PCI/P2PDMA: track pgmap references per resource, not globally
> af37085de906 lib/genalloc: introduce chunk owners
> e0047ff8aa77 PCI/P2PDMA: fix the gen_pool_add_virt() failure path
> 0315d47d6ae9 mm/devm_memremap_pages: introduce devm_memunmap_pages
> 216475c7eaa8 drivers/base/devres: introduce devm_release_action()
> 
> CONFLICT (content): Merge conflict in tools/testing/nvdimm/test/iomap.c
> CONFLICT (content): Merge conflict in mm/hmm.c
> CONFLICT (content): Merge conflict in kernel/memremap.c
> CONFLICT (content): Merge conflict in include/linux/memremap.h
> CONFLICT (content): Merge conflict in drivers/pci/p2pdma.c
> CONFLICT (content): Merge conflict in drivers/nvdimm/pmem.c
> CONFLICT (content): Merge conflict in drivers/dax/device.c
> CONFLICT (content): Merge conflict in drivers/dax/dax-private.h
> 
> Perhaps we should pull those out and resend them through hmm.git?

Hmm, I've been waiting for those patches to get in for a little while now ;(

Logan

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-13 20:17   ` Logan Gunthorpe
@ 2019-06-13 20:21     ` Dan Williams
  2019-06-13 20:24       ` Logan Gunthorpe
  0 siblings, 1 reply; 103+ messages in thread
From: Dan Williams @ 2019-06-13 20:21 UTC (permalink / raw)
  To: Logan Gunthorpe
  Cc: Christoph Hellwig, linux-nvdimm, nouveau,
	Linux Kernel Mailing List, Maling list - DRI developers,
	Linux MM, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs,
	linux-pci, Andrew Morton

On Thu, Jun 13, 2019 at 1:18 PM Logan Gunthorpe <logang@deltatee.com> wrote:
>
>
>
> On 2019-06-13 12:27 p.m., Dan Williams wrote:
> > On Thu, Jun 13, 2019 at 2:43 AM Christoph Hellwig <hch@lst.de> wrote:
> >>
> >> Hi Dan, Jérôme and Jason,
> >>
> >> below is a series that cleans up the dev_pagemap interface so that
> >> it is more easily usable, which removes the need to wrap it in hmm
> >> and thus allowing to kill a lot of code
> >>
> >> Diffstat:
> >>
> >>  22 files changed, 245 insertions(+), 802 deletions(-)
> >
> > Hooray!
> >
> >> Git tree:
> >>
> >>     git://git.infradead.org/users/hch/misc.git hmm-devmem-cleanup
> >
> > I just realized this collides with the dev_pagemap release rework in
> > Andrew's tree (commit ids below are from next.git and are not stable)
> >
> > 4422ee8476f0 mm/devm_memremap_pages: fix final page put race
> > 771f0714d0dc PCI/P2PDMA: track pgmap references per resource, not globally
> > af37085de906 lib/genalloc: introduce chunk owners
> > e0047ff8aa77 PCI/P2PDMA: fix the gen_pool_add_virt() failure path
> > 0315d47d6ae9 mm/devm_memremap_pages: introduce devm_memunmap_pages
> > 216475c7eaa8 drivers/base/devres: introduce devm_release_action()
> >
> > CONFLICT (content): Merge conflict in tools/testing/nvdimm/test/iomap.c
> > CONFLICT (content): Merge conflict in mm/hmm.c
> > CONFLICT (content): Merge conflict in kernel/memremap.c
> > CONFLICT (content): Merge conflict in include/linux/memremap.h
> > CONFLICT (content): Merge conflict in drivers/pci/p2pdma.c
> > CONFLICT (content): Merge conflict in drivers/nvdimm/pmem.c
> > CONFLICT (content): Merge conflict in drivers/dax/device.c
> > CONFLICT (content): Merge conflict in drivers/dax/dax-private.h
> >
> > Perhaps we should pull those out and resend them through hmm.git?
>
> Hmm, I've been waiting for those patches to get in for a little while now ;(

Unless Andrew was going to submit as v5.2-rc fixes I think I should
rebase / submit them on current hmm.git and then throw these cleanups
from Christoph on top?

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-13 20:21     ` Dan Williams
@ 2019-06-13 20:24       ` Logan Gunthorpe
  2019-06-13 20:48         ` Andrew Morton
  0 siblings, 1 reply; 103+ messages in thread
From: Logan Gunthorpe @ 2019-06-13 20:24 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, linux-nvdimm, nouveau,
	Linux Kernel Mailing List, Maling list - DRI developers,
	Linux MM, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs,
	linux-pci, Andrew Morton



On 2019-06-13 2:21 p.m., Dan Williams wrote:
> On Thu, Jun 13, 2019 at 1:18 PM Logan Gunthorpe <logang@deltatee.com> wrote:
>>
>>
>>
>> On 2019-06-13 12:27 p.m., Dan Williams wrote:
>>> On Thu, Jun 13, 2019 at 2:43 AM Christoph Hellwig <hch@lst.de> wrote:
>>>>
>>>> Hi Dan, Jérôme and Jason,
>>>>
>>>> below is a series that cleans up the dev_pagemap interface so that
>>>> it is more easily usable, which removes the need to wrap it in hmm
>>>> and thus allowing to kill a lot of code
>>>>
>>>> Diffstat:
>>>>
>>>>  22 files changed, 245 insertions(+), 802 deletions(-)
>>>
>>> Hooray!
>>>
>>>> Git tree:
>>>>
>>>>     git://git.infradead.org/users/hch/misc.git hmm-devmem-cleanup
>>>
>>> I just realized this collides with the dev_pagemap release rework in
>>> Andrew's tree (commit ids below are from next.git and are not stable)
>>>
>>> 4422ee8476f0 mm/devm_memremap_pages: fix final page put race
>>> 771f0714d0dc PCI/P2PDMA: track pgmap references per resource, not globally
>>> af37085de906 lib/genalloc: introduce chunk owners
>>> e0047ff8aa77 PCI/P2PDMA: fix the gen_pool_add_virt() failure path
>>> 0315d47d6ae9 mm/devm_memremap_pages: introduce devm_memunmap_pages
>>> 216475c7eaa8 drivers/base/devres: introduce devm_release_action()
>>>
>>> CONFLICT (content): Merge conflict in tools/testing/nvdimm/test/iomap.c
>>> CONFLICT (content): Merge conflict in mm/hmm.c
>>> CONFLICT (content): Merge conflict in kernel/memremap.c
>>> CONFLICT (content): Merge conflict in include/linux/memremap.h
>>> CONFLICT (content): Merge conflict in drivers/pci/p2pdma.c
>>> CONFLICT (content): Merge conflict in drivers/nvdimm/pmem.c
>>> CONFLICT (content): Merge conflict in drivers/dax/device.c
>>> CONFLICT (content): Merge conflict in drivers/dax/dax-private.h
>>>
>>> Perhaps we should pull those out and resend them through hmm.git?
>>
>> Hmm, I've been waiting for those patches to get in for a little while now ;(
> 
> Unless Andrew was going to submit as v5.2-rc fixes I think I should
> rebase / submit them on current hmm.git and then throw these cleanups
> from Christoph on top?

Whatever you feel is best. I'm just hoping they get in sooner rather
than later. They do fix a bug after all. Let me know if you want me to
retest the P2PDMA stuff after the rebase.

Thanks,

Logan

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-13 18:27 ` Dan Williams
  2019-06-13 20:17   ` Logan Gunthorpe
@ 2019-06-13 20:40   ` Jason Gunthorpe
  2019-06-13 21:21     ` Christoph Hellwig
  2019-06-14  0:31     ` Ira Weiny
  2019-06-14  6:13   ` Christoph Hellwig
  2 siblings, 2 replies; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 20:40 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, Jérôme Glisse, Ben Skeggs, Linux MM,
	nouveau, Maling list - DRI developers, linux-nvdimm, linux-pci,
	Linux Kernel Mailing List, Andrew Morton

On Thu, Jun 13, 2019 at 11:27:39AM -0700, Dan Williams wrote:
> On Thu, Jun 13, 2019 at 2:43 AM Christoph Hellwig <hch@lst.de> wrote:
> >
> > Hi Dan, Jérôme and Jason,
> >
> > below is a series that cleans up the dev_pagemap interface so that
> > it is more easily usable, which removes the need to wrap it in hmm
> > and thus allowing to kill a lot of code
> >
> > Diffstat:
> >
> >  22 files changed, 245 insertions(+), 802 deletions(-)
> 
> Hooray!
> 
> > Git tree:
> >
> >     git://git.infradead.org/users/hch/misc.git hmm-devmem-cleanup
> 
> I just realized this collides with the dev_pagemap release rework in
> Andrew's tree (commit ids below are from next.git and are not stable)
> 
> 4422ee8476f0 mm/devm_memremap_pages: fix final page put race
> 771f0714d0dc PCI/P2PDMA: track pgmap references per resource, not globally
> af37085de906 lib/genalloc: introduce chunk owners
> e0047ff8aa77 PCI/P2PDMA: fix the gen_pool_add_virt() failure path
> 0315d47d6ae9 mm/devm_memremap_pages: introduce devm_memunmap_pages
> 216475c7eaa8 drivers/base/devres: introduce devm_release_action()
> 
> CONFLICT (content): Merge conflict in tools/testing/nvdimm/test/iomap.c
> CONFLICT (content): Merge conflict in mm/hmm.c
> CONFLICT (content): Merge conflict in kernel/memremap.c
> CONFLICT (content): Merge conflict in include/linux/memremap.h
> CONFLICT (content): Merge conflict in drivers/pci/p2pdma.c
> CONFLICT (content): Merge conflict in drivers/nvdimm/pmem.c
> CONFLICT (content): Merge conflict in drivers/dax/device.c
> CONFLICT (content): Merge conflict in drivers/dax/dax-private.h
> 
> Perhaps we should pull those out and resend them through hmm.git?

It could be done - but how bad is the conflict resolution?

I'd be more comfortable to take a PR from you to merge into hmm.git,
rather than raw patches, then apply CH's series on top. I think.

That way if something goes wrong you can send your PR to Linus
directly.

> It also turns out the nvdimm unit tests crash with this signature on
> that branch where base v5.2-rc3 passes:
> 
>     BUG: kernel NULL pointer dereference, address: 0000000000000008
>     [..]
>     CPU: 15 PID: 1414 Comm: lt-libndctl Tainted: G           OE
> 5.2.0-rc3+ #3399
>     Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
>     RIP: 0010:percpu_ref_kill_and_confirm+0x1e/0x180
>     [..]
>     Call Trace:
>      release_nodes+0x234/0x280
>      device_release_driver_internal+0xe8/0x1b0
>      bus_remove_device+0xf2/0x160
>      device_del+0x166/0x370
>      unregister_dev_dax+0x23/0x50
>      release_nodes+0x234/0x280
>      device_release_driver_internal+0xe8/0x1b0
>      unbind_store+0x94/0x120
>      kernfs_fop_write+0xf0/0x1a0
>      vfs_write+0xb7/0x1b0
>      ksys_write+0x5c/0xd0
>      do_syscall_64+0x60/0x240

Too bad the trace didn't say which devm cleanup triggered it.. Did
dev_pagemap_percpu_exit get called with a NULL pgmap->ref ?

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-13 20:24       ` Logan Gunthorpe
@ 2019-06-13 20:48         ` Andrew Morton
  0 siblings, 0 replies; 103+ messages in thread
From: Andrew Morton @ 2019-06-13 20:48 UTC (permalink / raw)
  To: Logan Gunthorpe
  Cc: Dan Williams, Christoph Hellwig, linux-nvdimm, nouveau,
	Linux Kernel Mailing List, Maling list - DRI developers,
	Linux MM, Jérôme Glisse, Jason Gunthorpe, Ben Skeggs,
	linux-pci

On Thu, 13 Jun 2019 14:24:20 -0600 Logan Gunthorpe <logang@deltatee.com> wrote:

> 
> 
> On 2019-06-13 2:21 p.m., Dan Williams wrote:
> > On Thu, Jun 13, 2019 at 1:18 PM Logan Gunthorpe <logang@deltatee.com> wrote:
> >>
> >>
> >>
> >> On 2019-06-13 12:27 p.m., Dan Williams wrote:
> >>> On Thu, Jun 13, 2019 at 2:43 AM Christoph Hellwig <hch@lst.de> wrote:
> >>>>
> >>>> Hi Dan, Jérôme and Jason,
> >>>>
> >>>> below is a series that cleans up the dev_pagemap interface so that
> >>>> it is more easily usable, which removes the need to wrap it in hmm
> >>>> and thus allowing to kill a lot of code
> >>>>
> >>>> Diffstat:
> >>>>
> >>>>  22 files changed, 245 insertions(+), 802 deletions(-)
> >>>
> >>> Hooray!
> >>>
> >>>> Git tree:
> >>>>
> >>>>     git://git.infradead.org/users/hch/misc.git hmm-devmem-cleanup
> >>>
> >>> I just realized this collides with the dev_pagemap release rework in
> >>> Andrew's tree (commit ids below are from next.git and are not stable)
> >>>
> >>> 4422ee8476f0 mm/devm_memremap_pages: fix final page put race
> >>> 771f0714d0dc PCI/P2PDMA: track pgmap references per resource, not globally
> >>> af37085de906 lib/genalloc: introduce chunk owners
> >>> e0047ff8aa77 PCI/P2PDMA: fix the gen_pool_add_virt() failure path
> >>> 0315d47d6ae9 mm/devm_memremap_pages: introduce devm_memunmap_pages
> >>> 216475c7eaa8 drivers/base/devres: introduce devm_release_action()
> >>>
> >>> CONFLICT (content): Merge conflict in tools/testing/nvdimm/test/iomap.c
> >>> CONFLICT (content): Merge conflict in mm/hmm.c
> >>> CONFLICT (content): Merge conflict in kernel/memremap.c
> >>> CONFLICT (content): Merge conflict in include/linux/memremap.h
> >>> CONFLICT (content): Merge conflict in drivers/pci/p2pdma.c
> >>> CONFLICT (content): Merge conflict in drivers/nvdimm/pmem.c
> >>> CONFLICT (content): Merge conflict in drivers/dax/device.c
> >>> CONFLICT (content): Merge conflict in drivers/dax/dax-private.h
> >>>
> >>> Perhaps we should pull those out and resend them through hmm.git?
> >>
> >> Hmm, I've been waiting for those patches to get in for a little while now ;(
> > 
> > Unless Andrew was going to submit as v5.2-rc fixes I think I should
> > rebase / submit them on current hmm.git and then throw these cleanups
> > from Christoph on top?
> 
> Whatever you feel is best. I'm just hoping they get in sooner rather
> than later. They do fix a bug after all. Let me know if you want me to
> retest the P2PDMA stuff after the rebase.

I had them down for 5.3-rc1.  I'll send them along for 5.2-rc5 instead.


^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-13 20:40   ` Jason Gunthorpe
@ 2019-06-13 21:21     ` Christoph Hellwig
  2019-06-13 23:10       ` Jason Gunthorpe
  2019-06-14  0:31     ` Ira Weiny
  1 sibling, 1 reply; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-13 21:21 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Dan Williams, Christoph Hellwig, Jérôme Glisse,
	Ben Skeggs, Linux MM, nouveau, Maling list - DRI developers,
	linux-nvdimm, linux-pci, Linux Kernel Mailing List,
	Andrew Morton

On Thu, Jun 13, 2019 at 08:40:46PM +0000, Jason Gunthorpe wrote:
> > Perhaps we should pull those out and resend them through hmm.git?
> 
> It could be done - but how bad is the conflict resolution?

Trivial.  All but one patch just apply using git-am, and the other one
just has a few lines of offsets.

I've also done a preliminary rebase of my series on top of those
patches, and it really nicely helps making the drivers even simpler
and allows using the internal refcount in p2pdma.c as well.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [Nouveau] [PATCH 02/22] mm: remove the struct hmm_device infrastructure
  2019-06-13  9:43 ` [PATCH 02/22] mm: remove the struct hmm_device infrastructure Christoph Hellwig
  2019-06-13 18:46   ` Jason Gunthorpe
@ 2019-06-13 23:06   ` " John Hubbard
  1 sibling, 0 replies; 103+ messages in thread
From: John Hubbard @ 2019-06-13 23:06 UTC (permalink / raw)
  To: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs
  Cc: linux-nvdimm, linux-pci, linux-kernel, dri-devel, linux-mm, nouveau

On 6/13/19 2:43 AM, Christoph Hellwig wrote:
> This code is a trivial wrapper around device model helpers, which
> should have been integrated into the driver device model usage from
> the start.  Assuming it actually had users, which it never had since
> the code was added more than 1 1/2 years ago.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  include/linux/hmm.h | 20 ------------
>  mm/hmm.c            | 80 ---------------------------------------------
>  2 files changed, 100 deletions(-)
> 

Yes. This code is definitely unnecessary, and it's a good housecleaning here.

(As to the history: I know that there was some early "HMM dummy device" 
testing when the HMM code was much younger, but such testing has long since
been superseded by more elaborate testing with real drivers.)


Reviewed-by: John Hubbard <jhubbard@nvidia.com> 


thanks,
-- 
John Hubbard
NVIDIA

> diff --git a/include/linux/hmm.h b/include/linux/hmm.h
> index 0fa8ea34ccef..4867b9da1b6c 100644
> --- a/include/linux/hmm.h
> +++ b/include/linux/hmm.h
> @@ -717,26 +717,6 @@ static inline unsigned long hmm_devmem_page_get_drvdata(const struct page *page)
>  {
>  	return page->hmm_data;
>  }
> -
> -
> -/*
> - * struct hmm_device - fake device to hang device memory onto
> - *
> - * @device: device struct
> - * @minor: device minor number
> - */
> -struct hmm_device {
> -	struct device		device;
> -	unsigned int		minor;
> -};
> -
> -/*
> - * A device driver that wants to handle multiple devices memory through a
> - * single fake device can use hmm_device to do so. This is purely a helper and
> - * it is not strictly needed, in order to make use of any HMM functionality.
> - */
> -struct hmm_device *hmm_device_new(void *drvdata);
> -void hmm_device_put(struct hmm_device *hmm_device);
>  #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
>  #else /* IS_ENABLED(CONFIG_HMM) */
>  static inline void hmm_mm_destroy(struct mm_struct *mm) {}
> diff --git a/mm/hmm.c b/mm/hmm.c
> index 886b18695b97..ff2598eb7377 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -1499,84 +1499,4 @@ struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
>  	return devmem;
>  }
>  EXPORT_SYMBOL_GPL(hmm_devmem_add_resource);
> -
> -/*
> - * A device driver that wants to handle multiple devices memory through a
> - * single fake device can use hmm_device to do so. This is purely a helper
> - * and it is not needed to make use of any HMM functionality.
> - */
> -#define HMM_DEVICE_MAX 256
> -
> -static DECLARE_BITMAP(hmm_device_mask, HMM_DEVICE_MAX);
> -static DEFINE_SPINLOCK(hmm_device_lock);
> -static struct class *hmm_device_class;
> -static dev_t hmm_device_devt;
> -
> -static void hmm_device_release(struct device *device)
> -{
> -	struct hmm_device *hmm_device;
> -
> -	hmm_device = container_of(device, struct hmm_device, device);
> -	spin_lock(&hmm_device_lock);
> -	clear_bit(hmm_device->minor, hmm_device_mask);
> -	spin_unlock(&hmm_device_lock);
> -
> -	kfree(hmm_device);
> -}
> -
> -struct hmm_device *hmm_device_new(void *drvdata)
> -{
> -	struct hmm_device *hmm_device;
> -
> -	hmm_device = kzalloc(sizeof(*hmm_device), GFP_KERNEL);
> -	if (!hmm_device)
> -		return ERR_PTR(-ENOMEM);
> -
> -	spin_lock(&hmm_device_lock);
> -	hmm_device->minor = find_first_zero_bit(hmm_device_mask, HMM_DEVICE_MAX);
> -	if (hmm_device->minor >= HMM_DEVICE_MAX) {
> -		spin_unlock(&hmm_device_lock);
> -		kfree(hmm_device);
> -		return ERR_PTR(-EBUSY);
> -	}
> -	set_bit(hmm_device->minor, hmm_device_mask);
> -	spin_unlock(&hmm_device_lock);
> -
> -	dev_set_name(&hmm_device->device, "hmm_device%d", hmm_device->minor);
> -	hmm_device->device.devt = MKDEV(MAJOR(hmm_device_devt),
> -					hmm_device->minor);
> -	hmm_device->device.release = hmm_device_release;
> -	dev_set_drvdata(&hmm_device->device, drvdata);
> -	hmm_device->device.class = hmm_device_class;
> -	device_initialize(&hmm_device->device);
> -
> -	return hmm_device;
> -}
> -EXPORT_SYMBOL(hmm_device_new);
> -
> -void hmm_device_put(struct hmm_device *hmm_device)
> -{
> -	put_device(&hmm_device->device);
> -}
> -EXPORT_SYMBOL(hmm_device_put);
> -
> -static int __init hmm_init(void)
> -{
> -	int ret;
> -
> -	ret = alloc_chrdev_region(&hmm_device_devt, 0,
> -				  HMM_DEVICE_MAX,
> -				  "hmm_device");
> -	if (ret)
> -		return ret;
> -
> -	hmm_device_class = class_create(THIS_MODULE, "hmm_device");
> -	if (IS_ERR(hmm_device_class)) {
> -		unregister_chrdev_region(hmm_device_devt, HMM_DEVICE_MAX);
> -		return PTR_ERR(hmm_device_class);
> -	}
> -	return 0;
> -}
> -
> -device_initcall(hmm_init);
>  #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
> 

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-13 21:21     ` Christoph Hellwig
@ 2019-06-13 23:10       ` Jason Gunthorpe
  2019-06-14  6:14         ` Christoph Hellwig
  0 siblings, 1 reply; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-13 23:10 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Ben Skeggs, Linux MM,
	nouveau, Maling list - DRI developers, linux-nvdimm, linux-pci,
	Linux Kernel Mailing List, Andrew Morton

On Thu, Jun 13, 2019 at 11:21:01PM +0200, Christoph Hellwig wrote:
> On Thu, Jun 13, 2019 at 08:40:46PM +0000, Jason Gunthorpe wrote:
> > > Perhaps we should pull those out and resend them through hmm.git?
> > 
> > It could be done - but how bad is the conflict resolution?
> 
> Trivial.  All but one patch just apply using git-am, and the other one
> just has a few lines of offsets.

Okay, NP then, trivial ones are OK to send to Linus..

If Andrew gets them into -rc5 then I will get rc5 into hmm.git next
week.

Thanks,
Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 10/22] memremap: add a migrate callback to struct dev_pagemap_ops
  2019-06-13  9:43 ` [PATCH 10/22] memremap: add a migrate callback to struct dev_pagemap_ops Christoph Hellwig
@ 2019-06-13 23:42   ` Ralph Campbell
  2019-06-14  6:33     ` Christoph Hellwig
  0 siblings, 1 reply; 103+ messages in thread
From: Ralph Campbell @ 2019-06-13 23:42 UTC (permalink / raw)
  To: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel


On 6/13/19 2:43 AM, Christoph Hellwig wrote:
> This replaces the hacky ->fault callback, which is currently directly
> called from common code through a hmm specific data structure as an
> exercise in layering violations.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   include/linux/hmm.h      |  6 ------
>   include/linux/memremap.h |  6 ++++++
>   include/linux/swapops.h  | 15 ---------------
>   kernel/memremap.c        | 31 -------------------------------
>   mm/hmm.c                 | 13 +++++--------
>   mm/memory.c              |  9 ++-------
>   6 files changed, 13 insertions(+), 67 deletions(-)
> 
> diff --git a/include/linux/hmm.h b/include/linux/hmm.h
> index 5761a39221a6..3c9a59dbfdb8 100644
> --- a/include/linux/hmm.h
> +++ b/include/linux/hmm.h
> @@ -658,11 +658,6 @@ struct hmm_devmem_ops {
>    * chunk, as an optimization. It must, however, prioritize the faulting address
>    * over all the others.
>    */
> -typedef vm_fault_t (*dev_page_fault_t)(struct vm_area_struct *vma,
> -				unsigned long addr,
> -				const struct page *page,
> -				unsigned int flags,
> -				pmd_t *pmdp);
>   
>   struct hmm_devmem {
>   	struct completion		completion;
> @@ -673,7 +668,6 @@ struct hmm_devmem {
>   	struct dev_pagemap		pagemap;
>   	const struct hmm_devmem_ops	*ops;
>   	struct percpu_ref		ref;
> -	dev_page_fault_t		page_fault;
>   };
>   
>   /*
> diff --git a/include/linux/memremap.h b/include/linux/memremap.h
> index 96a3a6d564ad..03a4099be701 100644
> --- a/include/linux/memremap.h
> +++ b/include/linux/memremap.h
> @@ -75,6 +75,12 @@ struct dev_pagemap_ops {
>   	 * Transition the percpu_ref in struct dev_pagemap to the dead state.
>   	 */
>   	void (*kill)(struct dev_pagemap *pgmap);
> +
> +	/*
> +	 * Used for private (un-addressable) device memory only.  Must migrate
> +	 * the page back to a CPU accessible page.
> +	 */
> +	vm_fault_t (*migrate)(struct vm_fault *vmf);
>   };
>   
>   /**
> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> index 4d961668e5fc..15bdb6fe71e5 100644
> --- a/include/linux/swapops.h
> +++ b/include/linux/swapops.h
> @@ -129,12 +129,6 @@ static inline struct page *device_private_entry_to_page(swp_entry_t entry)
>   {
>   	return pfn_to_page(swp_offset(entry));
>   }
> -
> -vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
> -		       unsigned long addr,
> -		       swp_entry_t entry,
> -		       unsigned int flags,
> -		       pmd_t *pmdp);
>   #else /* CONFIG_DEVICE_PRIVATE */
>   static inline swp_entry_t make_device_private_entry(struct page *page, bool write)
>   {
> @@ -164,15 +158,6 @@ static inline struct page *device_private_entry_to_page(swp_entry_t entry)
>   {
>   	return NULL;
>   }
> -
> -static inline vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
> -				     unsigned long addr,
> -				     swp_entry_t entry,
> -				     unsigned int flags,
> -				     pmd_t *pmdp)
> -{
> -	return VM_FAULT_SIGBUS;
> -}
>   #endif /* CONFIG_DEVICE_PRIVATE */
>   
>   #ifdef CONFIG_MIGRATION
> diff --git a/kernel/memremap.c b/kernel/memremap.c
> index 6a3183cac764..7167e717647d 100644
> --- a/kernel/memremap.c
> +++ b/kernel/memremap.c
> @@ -11,7 +11,6 @@
>   #include <linux/types.h>
>   #include <linux/wait_bit.h>
>   #include <linux/xarray.h>
> -#include <linux/hmm.h>
>   
>   static DEFINE_XARRAY(pgmap_array);
>   #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
> @@ -48,36 +47,6 @@ static inline int dev_pagemap_enable(struct device *dev)
>   }
>   #endif /* CONFIG_DEV_PAGEMAP_OPS */
>   
> -#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
> -vm_fault_t device_private_entry_fault(struct vm_area_struct *vma,
> -		       unsigned long addr,
> -		       swp_entry_t entry,
> -		       unsigned int flags,
> -		       pmd_t *pmdp)
> -{
> -	struct page *page = device_private_entry_to_page(entry);
> -	struct hmm_devmem *devmem;
> -
> -	devmem = container_of(page->pgmap, typeof(*devmem), pagemap);
> -
> -	/*
> -	 * The page_fault() callback must migrate page back to system memory
> -	 * so that CPU can access it. This might fail for various reasons
> -	 * (device issue, device was unsafely unplugged, ...). When such
> -	 * error conditions happen, the callback must return VM_FAULT_SIGBUS.
> -	 *
> -	 * Note that because memory cgroup charges are accounted to the device
> -	 * memory, this should never fail because of memory restrictions (but
> -	 * allocation of regular system page might still fail because we are
> -	 * out of memory).
> -	 *
> -	 * There is a more in-depth description of what that callback can and
> -	 * cannot do, in include/linux/memremap.h
> -	 */
> -	return devmem->page_fault(vma, addr, page, flags, pmdp);
> -}
> -#endif /* CONFIG_DEVICE_PRIVATE */
> -
>   static void pgmap_array_delete(struct resource *res)
>   {
>   	xa_store_range(&pgmap_array, PHYS_PFN(res->start), PHYS_PFN(res->end),
> diff --git a/mm/hmm.c b/mm/hmm.c
> index 6dc769feb2e1..aab799677c7d 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -1330,15 +1330,12 @@ static void hmm_devmem_ref_kill(struct dev_pagemap *pgmap)
>   	percpu_ref_kill(pgmap->ref);
>   }
>   
> -static vm_fault_t hmm_devmem_fault(struct vm_area_struct *vma,
> -			    unsigned long addr,
> -			    const struct page *page,
> -			    unsigned int flags,
> -			    pmd_t *pmdp)
> +static vm_fault_t hmm_devmem_migrate(struct vm_fault *vmf)
>   {
> -	struct hmm_devmem *devmem = page->pgmap->data;
> +	struct hmm_devmem *devmem = vmf->page->pgmap->data;
>   
> -	return devmem->ops->fault(devmem, vma, addr, page, flags, pmdp);
> +	return devmem->ops->fault(devmem, vmf->vma, vmf->address, vmf->page,
> +			vmf->flags, vmf->pmd);
>   }
>   
>   static void hmm_devmem_free(struct page *page, void *data)
> @@ -1351,6 +1348,7 @@ static void hmm_devmem_free(struct page *page, void *data)
>   static const struct dev_pagemap_ops hmm_pagemap_ops = {
>   	.page_free		= hmm_devmem_free,
>   	.kill			= hmm_devmem_ref_kill,
> +	.migrate		= hmm_devmem_migrate,
>   };
>   
>   /*
> @@ -1405,7 +1403,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
>   	devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
>   	devmem->pfn_last = devmem->pfn_first +
>   			   (resource_size(devmem->resource) >> PAGE_SHIFT);
> -	devmem->page_fault = hmm_devmem_fault;
>   
>   	devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
>   	devmem->pagemap.res = *devmem->resource;
> diff --git a/mm/memory.c b/mm/memory.c
> index ddf20bd0c317..cbf3cb598436 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2782,13 +2782,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>   			migration_entry_wait(vma->vm_mm, vmf->pmd,
>   					     vmf->address);
>   		} else if (is_device_private_entry(entry)) {
> -			/*
> -			 * For un-addressable device memory we call the pgmap
> -			 * fault handler callback. The callback must migrate
> -			 * the page back to some CPU accessible page.
> -			 */
> -			ret = device_private_entry_fault(vma, vmf->address, entry,
> -						 vmf->flags, vmf->pmd);
> +			vmf->page = device_private_entry_to_page(entry);
> +			ret = page->pgmap->ops->migrate(vmf);

This needs to either initialize "page" or be changed to "vmf->page".
Otherwise, it is a NULL pointer dereference.

>   		} else if (is_hwpoison_entry(entry)) {
>   			ret = VM_FAULT_HWPOISON;
>   		} else {
> 

You can add:
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 13/22] device-dax: use the dev_pagemap internal refcount
  2019-06-13  9:43 ` [PATCH 13/22] device-dax: use the dev_pagemap internal refcount Christoph Hellwig
@ 2019-06-14  0:22   ` Ira Weiny
  2019-06-14  6:35     ` Christoph Hellwig
  0 siblings, 1 reply; 103+ messages in thread
From: Ira Weiny @ 2019-06-14  0:22 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, linux-nvdimm, linux-pci, linux-kernel, dri-devel,
	linux-mm, nouveau

On Thu, Jun 13, 2019 at 11:43:16AM +0200, Christoph Hellwig wrote:
> The functionality is identical to the one currently open coded in
> device-dax.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/dax/dax-private.h |  4 ---
>  drivers/dax/device.c      | 52 +--------------------------------------
>  2 files changed, 1 insertion(+), 55 deletions(-)
> 
> diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
> index a45612148ca0..ed04a18a35be 100644
> --- a/drivers/dax/dax-private.h
> +++ b/drivers/dax/dax-private.h
> @@ -51,8 +51,6 @@ struct dax_region {
>   * @target_node: effective numa node if dev_dax memory range is onlined
>   * @dev - device core
>   * @pgmap - pgmap for memmap setup / lifetime (driver owned)
> - * @ref: pgmap reference count (driver owned)
> - * @cmp: @ref final put completion (driver owned)
>   */
>  struct dev_dax {
>  	struct dax_region *region;
> @@ -60,8 +58,6 @@ struct dev_dax {
>  	int target_node;
>  	struct device dev;
>  	struct dev_pagemap pgmap;
> -	struct percpu_ref ref;
> -	struct completion cmp;
>  };
>  
>  static inline struct dev_dax *to_dev_dax(struct device *dev)
> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> index e23fa1bd8c97..a9d7c90ecf1e 100644
> --- a/drivers/dax/device.c
> +++ b/drivers/dax/device.c
> @@ -14,37 +14,6 @@
>  #include "dax-private.h"
>  #include "bus.h"
>  
> -static struct dev_dax *ref_to_dev_dax(struct percpu_ref *ref)
> -{
> -	return container_of(ref, struct dev_dax, ref);
> -}
> -
> -static void dev_dax_percpu_release(struct percpu_ref *ref)
> -{
> -	struct dev_dax *dev_dax = ref_to_dev_dax(ref);
> -
> -	dev_dbg(&dev_dax->dev, "%s\n", __func__);
> -	complete(&dev_dax->cmp);
> -}
> -
> -static void dev_dax_percpu_exit(void *data)
> -{
> -	struct percpu_ref *ref = data;
> -	struct dev_dax *dev_dax = ref_to_dev_dax(ref);
> -
> -	dev_dbg(&dev_dax->dev, "%s\n", __func__);
> -	wait_for_completion(&dev_dax->cmp);
> -	percpu_ref_exit(ref);
> -}
> -
> -static void dev_dax_percpu_kill(struct dev_pagemap *pgmap)
> -{
> -	struct dev_dax *dev_dax = container_of(pgmap, struct dev_dax, pgmap);
> -
> -	dev_dbg(&dev_dax->dev, "%s\n", __func__);
> -	percpu_ref_kill(pgmap->ref);
> -}
> -
>  static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
>  		const char *func)
>  {
> @@ -442,10 +411,6 @@ static void dev_dax_kill(void *dev_dax)
>  	kill_dev_dax(dev_dax);
>  }
>  
> -static const struct dev_pagemap_ops dev_dax_pagemap_ops = {
> -	.kill		= dev_dax_percpu_kill,
> -};
> -
>  int dev_dax_probe(struct device *dev)
>  {
>  	struct dev_dax *dev_dax = to_dev_dax(dev);
> @@ -463,24 +428,9 @@ int dev_dax_probe(struct device *dev)
>  		return -EBUSY;
>  	}
>  
> -	init_completion(&dev_dax->cmp);
> -	rc = percpu_ref_init(&dev_dax->ref, dev_dax_percpu_release, 0,
> -			GFP_KERNEL);
> -	if (rc)
> -		return rc;
> -
> -	rc = devm_add_action_or_reset(dev, dev_dax_percpu_exit, &dev_dax->ref);
> -	if (rc)
> -		return rc;
> -
> -	dev_dax->pgmap.ref = &dev_dax->ref;

I don't think this exactly correct.  pgmap.ref is a pointer to the dev_dax ref
structure.  Taking it away will cause devm_memremap_pages() to fail AFAICS.

I think you need to change struct dev_pagemap as well:

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index f0628660d541..5e2120589ddf 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -90,7 +90,7 @@ struct dev_pagemap {
        struct vmem_altmap altmap;
        bool altmap_valid;
        struct resource res;
-       struct percpu_ref *ref;
+       struct percpu_ref ref;
        void (*kill)(struct percpu_ref *ref);
        struct device *dev;
        void *data;

And all usages of it, right?

Ira

> -	dev_dax->pgmap.ops = &dev_dax_pagemap_ops;
>  	addr = devm_memremap_pages(dev, &dev_dax->pgmap);
> -	if (IS_ERR(addr)) {
> -		devm_remove_action(dev, dev_dax_percpu_exit, &dev_dax->ref);
> -		percpu_ref_exit(&dev_dax->ref);
> +	if (IS_ERR(addr))
>  		return PTR_ERR(addr);
> -	}
>  
>  	inode = dax_inode(dax_dev);
>  	cdev = inode->i_cdev;
> -- 
> 2.20.1
> 
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-13 20:40   ` Jason Gunthorpe
  2019-06-13 21:21     ` Christoph Hellwig
@ 2019-06-14  0:31     ` Ira Weiny
  1 sibling, 0 replies; 103+ messages in thread
From: Ira Weiny @ 2019-06-14  0:31 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Dan Williams, linux-nvdimm, nouveau, Linux Kernel Mailing List,
	Maling list - DRI developers, Linux MM, Jérôme Glisse,
	Ben Skeggs, linux-pci, Andrew Morton, Christoph Hellwig

On Thu, Jun 13, 2019 at 08:40:46PM +0000, Jason Gunthorpe wrote:
> On Thu, Jun 13, 2019 at 11:27:39AM -0700, Dan Williams wrote:
> > On Thu, Jun 13, 2019 at 2:43 AM Christoph Hellwig <hch@lst.de> wrote:
> > >
> > > Hi Dan, Jérôme and Jason,
> > >
> > > below is a series that cleans up the dev_pagemap interface so that
> > > it is more easily usable, which removes the need to wrap it in hmm
> > > and thus allowing to kill a lot of code
> > >
> > > Diffstat:
> > >
> > >  22 files changed, 245 insertions(+), 802 deletions(-)
> > 
> > Hooray!
> > 
> > > Git tree:
> > >
> > >     git://git.infradead.org/users/hch/misc.git hmm-devmem-cleanup
> > 
> > I just realized this collides with the dev_pagemap release rework in
> > Andrew's tree (commit ids below are from next.git and are not stable)
> > 
> > 4422ee8476f0 mm/devm_memremap_pages: fix final page put race
> > 771f0714d0dc PCI/P2PDMA: track pgmap references per resource, not globally
> > af37085de906 lib/genalloc: introduce chunk owners
> > e0047ff8aa77 PCI/P2PDMA: fix the gen_pool_add_virt() failure path
> > 0315d47d6ae9 mm/devm_memremap_pages: introduce devm_memunmap_pages
> > 216475c7eaa8 drivers/base/devres: introduce devm_release_action()
> > 
> > CONFLICT (content): Merge conflict in tools/testing/nvdimm/test/iomap.c
> > CONFLICT (content): Merge conflict in mm/hmm.c
> > CONFLICT (content): Merge conflict in kernel/memremap.c
> > CONFLICT (content): Merge conflict in include/linux/memremap.h
> > CONFLICT (content): Merge conflict in drivers/pci/p2pdma.c
> > CONFLICT (content): Merge conflict in drivers/nvdimm/pmem.c
> > CONFLICT (content): Merge conflict in drivers/dax/device.c
> > CONFLICT (content): Merge conflict in drivers/dax/dax-private.h
> > 
> > Perhaps we should pull those out and resend them through hmm.git?
> 
> It could be done - but how bad is the conflict resolution?
> 
> I'd be more comfortable to take a PR from you to merge into hmm.git,
> rather than raw patches, then apply CH's series on top. I think.
> 
> That way if something goes wrong you can send your PR to Linus
> directly.
> 
> > It also turns out the nvdimm unit tests crash with this signature on
> > that branch where base v5.2-rc3 passes:
> > 
> >     BUG: kernel NULL pointer dereference, address: 0000000000000008
> >     [..]
> >     CPU: 15 PID: 1414 Comm: lt-libndctl Tainted: G           OE
> > 5.2.0-rc3+ #3399
> >     Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015
> >     RIP: 0010:percpu_ref_kill_and_confirm+0x1e/0x180
> >     [..]
> >     Call Trace:
> >      release_nodes+0x234/0x280
> >      device_release_driver_internal+0xe8/0x1b0
> >      bus_remove_device+0xf2/0x160
> >      device_del+0x166/0x370
> >      unregister_dev_dax+0x23/0x50
> >      release_nodes+0x234/0x280
> >      device_release_driver_internal+0xe8/0x1b0
> >      unbind_store+0x94/0x120
> >      kernfs_fop_write+0xf0/0x1a0
> >      vfs_write+0xb7/0x1b0
> >      ksys_write+0x5c/0xd0
> >      do_syscall_64+0x60/0x240
> 
> Too bad the trace didn't say which devm cleanup triggered it.. Did
> dev_pagemap_percpu_exit get called with a NULL pgmap->ref ?

I would guess something like that.  I did not fully wrap my head around the ref
counting there but I don't think the patch is correct.  See my review.

Ira

> 
> Jason
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-13 19:58       ` Jason Gunthorpe
@ 2019-06-14  0:43         ` Ira Weiny
  2019-06-14  1:23           ` John Hubbard
  2019-06-14  6:43           ` Christoph Hellwig
  0 siblings, 2 replies; 103+ messages in thread
From: Ira Weiny @ 2019-06-14  0:43 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Ralph Campbell, linux-nvdimm, nouveau, linux-kernel, dri-devel,
	linux-mm, Jérôme Glisse, Ben Skeggs, linux-pci,
	Christoph Hellwig

On Thu, Jun 13, 2019 at 07:58:29PM +0000, Jason Gunthorpe wrote:
> On Thu, Jun 13, 2019 at 12:53:02PM -0700, Ralph Campbell wrote:
> > 
> > On 6/13/19 12:44 PM, Jason Gunthorpe wrote:
> > > On Thu, Jun 13, 2019 at 11:43:21AM +0200, Christoph Hellwig wrote:
> > > > The code hasn't been used since it was added to the tree, and doesn't
> > > > appear to actually be usable.  Mark it as BROKEN until either a user
> > > > comes along or we finally give up on it.
> > > > 
> > > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > > >   mm/Kconfig | 1 +
> > > >   1 file changed, 1 insertion(+)
> > > > 
> > > > diff --git a/mm/Kconfig b/mm/Kconfig
> > > > index 0d2ba7e1f43e..406fa45e9ecc 100644
> > > > +++ b/mm/Kconfig
> > > > @@ -721,6 +721,7 @@ config DEVICE_PRIVATE
> > > >   config DEVICE_PUBLIC
> > > >   	bool "Addressable device memory (like GPU memory)"
> > > >   	depends on ARCH_HAS_HMM
> > > > +	depends on BROKEN
> > > >   	select HMM
> > > >   	select DEV_PAGEMAP_OPS
> > > 
> > > This seems a bit harsh, we do have another kconfig that selects this
> > > one today:
> > > 
> > > config DRM_NOUVEAU_SVM
> > >          bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support"
> > >          depends on ARCH_HAS_HMM
> > >          depends on DRM_NOUVEAU
> > >          depends on STAGING
> > >          select HMM_MIRROR
> > >          select DEVICE_PRIVATE
> > >          default n
> > >          help
> > >            Say Y here if you want to enable experimental support for
> > >            Shared Virtual Memory (SVM).
> > > 
> > > Maybe it should be depends on STAGING not broken?
> > > 
> > > or maybe nouveau_svm doesn't actually need DEVICE_PRIVATE?
> > > 
> > > Jason
> > 
> > I think you are confusing DEVICE_PRIVATE for DEVICE_PUBLIC.
> > DRM_NOUVEAU_SVM does use DEVICE_PRIVATE but not DEVICE_PUBLIC.
> 
> Indeed you are correct, never mind
> 
> Hum, so the only thing this config does is short circuit here:
> 
> static inline bool is_device_public_page(const struct page *page)
> {
>         return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
>                 IS_ENABLED(CONFIG_DEVICE_PUBLIC) &&
>                 is_zone_device_page(page) &&
>                 page->pgmap->type == MEMORY_DEVICE_PUBLIC;
> }
> 
> Which is called all over the place.. 

<sigh>  yes but the earlier patch:

[PATCH 03/22] mm: remove hmm_devmem_add_resource

Removes the only place type is set to MEMORY_DEVICE_PUBLIC.

So I think it is ok.  Frankly I was wondering if we should remove the public
type altogether but conceptually it seems ok.  But I don't see any users of it
so...  should we get rid of it in the code rather than turning the config off?

Ira

> 
> So, yes, we really don't want any distro or something to turn this on
> until it has a use.
> 
> Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
> 
> Jason
> _______________________________________________
> Linux-nvdimm mailing list
> Linux-nvdimm@lists.01.org
> https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [Nouveau] [PATCH 03/22] mm: remove hmm_devmem_add_resource
  2019-06-13  9:43 ` [PATCH 03/22] mm: remove hmm_devmem_add_resource Christoph Hellwig
  2019-06-13 18:52   ` Jason Gunthorpe
@ 2019-06-14  0:54   ` " John Hubbard
  2019-06-20 19:32   ` Michal Hocko
  2 siblings, 0 replies; 103+ messages in thread
From: John Hubbard @ 2019-06-14  0:54 UTC (permalink / raw)
  To: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs
  Cc: linux-nvdimm, linux-pci, linux-kernel, dri-devel, linux-mm, nouveau

On 6/13/19 2:43 AM, Christoph Hellwig wrote:
> This function has never been used since it was first added to the kernel
> more than a year and a half ago, and if we ever grow a consumer of the
> MEMORY_DEVICE_PUBLIC infrastructure it can easily use devm_memremap_pages
> directly now that we've simplified the API for it.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  include/linux/hmm.h |  3 ---
>  mm/hmm.c            | 54 ---------------------------------------------
>  2 files changed, 57 deletions(-)
> 

No objections here, good cleanup.

Reviewed-by: John Hubbard <jhubbard@nvidia.com> 

thanks,
-- 
John Hubbard
NVIDIA

> diff --git a/include/linux/hmm.h b/include/linux/hmm.h
> index 4867b9da1b6c..5761a39221a6 100644
> --- a/include/linux/hmm.h
> +++ b/include/linux/hmm.h
> @@ -688,9 +688,6 @@ struct hmm_devmem {
>  struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
>  				  struct device *device,
>  				  unsigned long size);
> -struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
> -					   struct device *device,
> -					   struct resource *res);
>  
>  /*
>   * hmm_devmem_page_set_drvdata - set per-page driver data field
> diff --git a/mm/hmm.c b/mm/hmm.c
> index ff2598eb7377..0c62426d1257 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -1445,58 +1445,4 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
>  	return devmem;
>  }
>  EXPORT_SYMBOL_GPL(hmm_devmem_add);
> -
> -struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
> -					   struct device *device,
> -					   struct resource *res)
> -{
> -	struct hmm_devmem *devmem;
> -	void *result;
> -	int ret;
> -
> -	if (res->desc != IORES_DESC_DEVICE_PUBLIC_MEMORY)
> -		return ERR_PTR(-EINVAL);
> -
> -	dev_pagemap_get_ops();
> -
> -	devmem = devm_kzalloc(device, sizeof(*devmem), GFP_KERNEL);
> -	if (!devmem)
> -		return ERR_PTR(-ENOMEM);
> -
> -	init_completion(&devmem->completion);
> -	devmem->pfn_first = -1UL;
> -	devmem->pfn_last = -1UL;
> -	devmem->resource = res;
> -	devmem->device = device;
> -	devmem->ops = ops;
> -
> -	ret = percpu_ref_init(&devmem->ref, &hmm_devmem_ref_release,
> -			      0, GFP_KERNEL);
> -	if (ret)
> -		return ERR_PTR(ret);
> -
> -	ret = devm_add_action_or_reset(device, hmm_devmem_ref_exit,
> -			&devmem->ref);
> -	if (ret)
> -		return ERR_PTR(ret);
> -
> -	devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
> -	devmem->pfn_last = devmem->pfn_first +
> -			   (resource_size(devmem->resource) >> PAGE_SHIFT);
> -	devmem->page_fault = hmm_devmem_fault;
> -
> -	devmem->pagemap.type = MEMORY_DEVICE_PUBLIC;
> -	devmem->pagemap.res = *devmem->resource;
> -	devmem->pagemap.page_free = hmm_devmem_free;
> -	devmem->pagemap.altmap_valid = false;
> -	devmem->pagemap.ref = &devmem->ref;
> -	devmem->pagemap.data = devmem;
> -	devmem->pagemap.kill = hmm_devmem_ref_kill;
> -
> -	result = devm_memremap_pages(devmem->device, &devmem->pagemap);
> -	if (IS_ERR(result))
> -		return result;
> -	return devmem;
> -}
> -EXPORT_SYMBOL_GPL(hmm_devmem_add_resource);
>  #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
> 

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-14  0:43         ` Ira Weiny
@ 2019-06-14  1:23           ` John Hubbard
  2019-06-19 19:27             ` Jason Gunthorpe
  2019-06-14  6:43           ` Christoph Hellwig
  1 sibling, 1 reply; 103+ messages in thread
From: John Hubbard @ 2019-06-14  1:23 UTC (permalink / raw)
  To: Ira Weiny, Jason Gunthorpe
  Cc: Ralph Campbell, linux-nvdimm, nouveau, linux-kernel, dri-devel,
	linux-mm, Jérôme Glisse, Ben Skeggs, linux-pci,
	Christoph Hellwig

On 6/13/19 5:43 PM, Ira Weiny wrote:
> On Thu, Jun 13, 2019 at 07:58:29PM +0000, Jason Gunthorpe wrote:
>> On Thu, Jun 13, 2019 at 12:53:02PM -0700, Ralph Campbell wrote:
>>>
...
>> Hum, so the only thing this config does is short circuit here:
>>
>> static inline bool is_device_public_page(const struct page *page)
>> {
>>         return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
>>                 IS_ENABLED(CONFIG_DEVICE_PUBLIC) &&
>>                 is_zone_device_page(page) &&
>>                 page->pgmap->type == MEMORY_DEVICE_PUBLIC;
>> }
>>
>> Which is called all over the place.. 
> 
> <sigh>  yes but the earlier patch:
> 
> [PATCH 03/22] mm: remove hmm_devmem_add_resource
> 
> Removes the only place type is set to MEMORY_DEVICE_PUBLIC.
> 
> So I think it is ok.  Frankly I was wondering if we should remove the public
> type altogether but conceptually it seems ok.  But I don't see any users of it
> so...  should we get rid of it in the code rather than turning the config off?
> 
> Ira

That seems reasonable. I recall that the hope was for those IBM Power 9
systems to use _PUBLIC, as they have hardware-based coherent device (GPU)
memory, and so the memory really is visible to the CPU. And the IBM team
was thinking of taking advantage of it. But I haven't seen anything on
that front for a while.

So maybe it will get re-added as part of a future patchset to use that
kind of memory, but yes, we should not hesitate to clean house at this
point, and delete unused code.


thanks,
-- 
John Hubbard
NVIDIA

> 
>>
>> So, yes, we really don't want any distro or something to turn this on
>> until it has a use.
>>
>> Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
>>
>> Jason
>> _______________________________________________
>> Linux-nvdimm mailing list
>> Linux-nvdimm@lists.01.org
>> https://lists.01.org/mailman/listinfo/linux-nvdimm

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 04/22] mm: don't clear ->mapping in hmm_devmem_free
  2019-06-13  9:43 ` [PATCH 04/22] mm: don't clear ->mapping in hmm_devmem_free Christoph Hellwig
  2019-06-13 19:05   ` Jason Gunthorpe
@ 2019-06-14  1:46   ` John Hubbard
  2019-06-20 19:36   ` Michal Hocko
  2 siblings, 0 replies; 103+ messages in thread
From: John Hubbard @ 2019-06-14  1:46 UTC (permalink / raw)
  To: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On 6/13/19 2:43 AM, Christoph Hellwig wrote:
> ->mapping isn't even used by HMM users, and the field at the same offset
> in the zone_device part of the union is declared as pad.  (Which btw is
> rather confusing, as DAX uses ->pgmap and ->mapping from two different
> sides of the union, but DAX doesn't use hmm_devmem_free).
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  mm/hmm.c | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/mm/hmm.c b/mm/hmm.c
> index 0c62426d1257..e1dc98407e7b 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -1347,8 +1347,6 @@ static void hmm_devmem_free(struct page *page, void *data)
>  {
>  	struct hmm_devmem *devmem = data;
>  
> -	page->mapping = NULL;
> -
>  	devmem->ops->free(devmem, page);
>  }
>  
> 

Yes, I think that line was unnecessary. I see from git history that it was
originally being set to NULL from within __put_devmap_managed_page(), and then
in commit 2fa147bdbf672c53386a8f5f2c7fe358004c3ef8, Dan moved it out of there,
and stashed in specifically here. But it appears to have been unnecessary from
the beginning.

Reviewed-by: John Hubbard <jhubbard@nvidia.com> 

thanks,
-- 
John Hubbard
NVIDIA

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 05/22] mm: export alloc_pages_vma
  2019-06-13  9:43 ` [PATCH 05/22] mm: export alloc_pages_vma Christoph Hellwig
@ 2019-06-14  1:47   ` John Hubbard
  2019-06-14  6:23     ` Christoph Hellwig
  2019-06-20 19:17   ` Michal Hocko
  1 sibling, 1 reply; 103+ messages in thread
From: John Hubbard @ 2019-06-14  1:47 UTC (permalink / raw)
  To: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On 6/13/19 2:43 AM, Christoph Hellwig wrote:
> noveau is currently using this through an odd hmm wrapper, and I plan

  "nouveau"

> to switch it to the real thing later in this series.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---

Reviewed-by: John Hubbard <jhubbard@nvidia.com> 

thanks,
-- 
John Hubbard
NVIDIA

>  mm/mempolicy.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 01600d80ae01..f9023b5fba37 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -2098,6 +2098,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
>  out:
>  	return page;
>  }
> +EXPORT_SYMBOL_GPL(alloc_pages_vma);
>  
>  /**
>   * 	alloc_pages_current - Allocate pages.
> 

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [Nouveau] [PATCH 22/22] mm: don't select MIGRATE_VMA_HELPER from HMM_MIRROR
  2019-06-13  9:43 ` [PATCH 22/22] mm: don't select MIGRATE_VMA_HELPER from HMM_MIRROR Christoph Hellwig
  2019-06-13 20:04   ` Jason Gunthorpe
@ 2019-06-14  1:53   ` " John Hubbard
  2019-06-14  6:48     ` Christoph Hellwig
  1 sibling, 1 reply; 103+ messages in thread
From: John Hubbard @ 2019-06-14  1:53 UTC (permalink / raw)
  To: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs
  Cc: linux-nvdimm, linux-pci, linux-kernel, dri-devel, linux-mm, nouveau

On 6/13/19 2:43 AM, Christoph Hellwig wrote:
> The migrate_vma helper is only used by noveau to migrate device private
> pages around.  Other HMM_MIRROR users like amdgpu or infiniband don't
> need it.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  drivers/gpu/drm/nouveau/Kconfig | 1 +
>  mm/Kconfig                      | 1 -
>  2 files changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
> index 66c839d8e9d1..96b9814e6d06 100644
> --- a/drivers/gpu/drm/nouveau/Kconfig
> +++ b/drivers/gpu/drm/nouveau/Kconfig
> @@ -88,6 +88,7 @@ config DRM_NOUVEAU_SVM
>  	depends on DRM_NOUVEAU
>  	depends on HMM_MIRROR
>  	depends on STAGING
> +	select MIGRATE_VMA_HELPER
>  	default n
>  	help
>  	  Say Y here if you want to enable experimental support for
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 73676cb4693f..eca88679b624 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -679,7 +679,6 @@ config HMM_MIRROR
>  	bool "HMM mirror CPU page table into a device page table"
>  	depends on MMU
>  	select MMU_NOTIFIER
> -	select MIGRATE_VMA_HELPER
>  	help
>  	  Select HMM_MIRROR if you want to mirror range of the CPU page table of a
>  	  process into a device page table. Here, mirror means "keep synchronized".
> 

For those who have out of tree drivers that need migrate_vma(), but are not
Nouveau, could we pretty please allow a way to select that independently?

It's not a big deal, as I expect the Nouveau option will normally be selected, 
but it would be nice. Because there is a valid configuration that involves 
Nouveau not being selected, but our driver still wanting to run.

Maybe we can add something like this on top of what you have?

diff --git a/mm/Kconfig b/mm/Kconfig
index eca88679b624..330996632513 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -670,7 +670,10 @@ config ZONE_DEVICE
          If FS_DAX is enabled, then say Y.
 
 config MIGRATE_VMA_HELPER
-       bool
+       bool "migrate_vma() helper routine"
+       help
+         Provides a migrate_vma() routine that GPUs and other
+         device drivers may need.
 
 config DEV_PAGEMAP_OPS
        bool



thanks,
-- 
John Hubbard
NVIDIA

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-13 14:16 ` dev_pagemap related cleanups Jason Gunthorpe
@ 2019-06-14  6:12   ` Christoph Hellwig
  0 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-14  6:12 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Ben Skeggs, linux-mm, nouveau, dri-devel, linux-nvdimm,
	linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 02:16:27PM +0000, Jason Gunthorpe wrote:
> On Thu, Jun 13, 2019 at 11:43:03AM +0200, Christoph Hellwig wrote:
> > Hi Dan, Jérôme and Jason,
> > 
> > below is a series that cleans up the dev_pagemap interface so that
> > it is more easily usable, which removes the need to wrap it in hmm
> > and thus allowing to kill a lot of code
> 
> Do you want some of this to run through hmm.git? I see many patches
> that don't seem to have inter-dependencies..

I think running it through hmm.git makes sense.  While there are not
actual functional dependency and just a few cosmetic conflicts keeping
the hmm stuff together makes a lot of sense.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-13 18:27 ` Dan Williams
  2019-06-13 20:17   ` Logan Gunthorpe
  2019-06-13 20:40   ` Jason Gunthorpe
@ 2019-06-14  6:13   ` Christoph Hellwig
  2019-06-15  1:14     ` Dan Williams
  2 siblings, 1 reply; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-14  6:13 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, Linux MM, nouveau, Maling list - DRI developers,
	linux-nvdimm, linux-pci, Linux Kernel Mailing List,
	Andrew Morton

On Thu, Jun 13, 2019 at 11:27:39AM -0700, Dan Williams wrote:
> It also turns out the nvdimm unit tests crash with this signature on
> that branch where base v5.2-rc3 passes:

How do you run that test?

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-13 23:10       ` Jason Gunthorpe
@ 2019-06-14  6:14         ` Christoph Hellwig
  0 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-14  6:14 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Ben Skeggs, Linux MM, nouveau, Maling list - DRI developers,
	linux-nvdimm, linux-pci, Linux Kernel Mailing List,
	Andrew Morton

On Thu, Jun 13, 2019 at 11:10:45PM +0000, Jason Gunthorpe wrote:
> Okay, NP then, trivial ones are OK to send to Linus..
> 
> If Andrew gets them into -rc5 then I will get rc5 into hmm.git next
> week.

If I interpret Andrews mails from last night correctly he just sent
them to Linus.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 03/22] mm: remove hmm_devmem_add_resource
  2019-06-13 18:52   ` Jason Gunthorpe
@ 2019-06-14  6:19     ` Christoph Hellwig
  0 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-14  6:19 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Ben Skeggs, linux-mm, nouveau, dri-devel, linux-nvdimm,
	linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 06:52:44PM +0000, Jason Gunthorpe wrote:
> On Thu, Jun 13, 2019 at 11:43:06AM +0200, Christoph Hellwig wrote:
> > This function has never been used since it was first added to the kernel
> > more than a year and a half ago, and if we ever grow a consumer of the
> > MEMORY_DEVICE_PUBLIC infrastructure it can easily use devm_memremap_pages
> > directly now that we've simplified the API for it.
> 
> nit: Have we simplified the interface for devm_memremap_pages() at
> this point, or are you talking about later patches in this series.

After this series.  I've just droped that part of the sentence to
avoid confusion.

> I checked this and all the called functions are exported symbols, so
> there is no blocker for a future driver to call devm_memremap_pages(),
> maybe even with all this boiler plate, in future.
> 
> If we eventually get many users that need some simplified registration
> then we should add a devm_memremap_pages_simplified() interface and
> factor out that code when we can see the pattern.

After this series devm_memremap_pages already is simpler to use than
hmm_device_add_resource was before, so I'm not worried at all.  The
series actually net removes lines from noveau (if only a few).

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 04/22] mm: don't clear ->mapping in hmm_devmem_free
  2019-06-13 19:05   ` Jason Gunthorpe
@ 2019-06-14  6:21     ` Christoph Hellwig
  0 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-14  6:21 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Ben Skeggs, linux-mm, nouveau, dri-devel, linux-nvdimm,
	linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 07:05:07PM +0000, Jason Gunthorpe wrote:
> Hurm, is hmm following this comment from mm_types.h?
> 
>  * If you allocate the page using alloc_pages(), you can use some of the
>  * space in struct page for your own purposes.  The five words in the main
>  * union are available, except for bit 0 of the first word which must be
>  * kept clear.  Many users use this word to store a pointer to an object
>  * which is guaranteed to be aligned.  If you use the same storage as
>  * page->mapping, you must restore it to NULL before freeing the page.
> 
> Maybe the assumption was that a driver is using ->mapping ?

Maybe.  The union layou in struct page certainly doesn't help..

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 05/22] mm: export alloc_pages_vma
  2019-06-14  1:47   ` John Hubbard
@ 2019-06-14  6:23     ` Christoph Hellwig
  0 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-14  6:23 UTC (permalink / raw)
  To: John Hubbard
  Cc: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs, linux-mm, nouveau, dri-devel,
	linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 06:47:57PM -0700, John Hubbard wrote:
> On 6/13/19 2:43 AM, Christoph Hellwig wrote:
> > noveau is currently using this through an odd hmm wrapper, and I plan
> 
>   "nouveau"

Meh, I keep misspelling that name.  I've already fixed it up a few times
for this series along.


^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 06/22] mm: factor out a devm_request_free_mem_region helper
  2019-06-13 19:16   ` Jason Gunthorpe
@ 2019-06-14  6:24     ` Christoph Hellwig
  0 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-14  6:24 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Ben Skeggs, linux-mm, nouveau, dri-devel, linux-nvdimm,
	linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 07:16:35PM +0000, Jason Gunthorpe wrote:
> I wonder if IORES_DESC_DEVICE_PRIVATE_MEMORY should be a function
> argument?

No.  The only reason to use this function is to allocate the fake
physical address space for the device private memory case.  If you'd
deal with real resources you'd use the normal resource allocator.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 09/22] memremap: lift the devmap_enable manipulation into devm_memremap_pages
  2019-06-13 19:34   ` Jason Gunthorpe
  2019-06-13 20:13     ` Dan Williams
@ 2019-06-14  6:28     ` Christoph Hellwig
  1 sibling, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-14  6:28 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Ben Skeggs, linux-mm, nouveau, dri-devel, linux-nvdimm,
	linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 07:34:31PM +0000, Jason Gunthorpe wrote:
> Reviewed-by: Christoph Hellwig <hch@lst.de>

Really? :)

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 10/22] memremap: add a migrate callback to struct dev_pagemap_ops
  2019-06-13 23:42   ` Ralph Campbell
@ 2019-06-14  6:33     ` Christoph Hellwig
  0 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-14  6:33 UTC (permalink / raw)
  To: Ralph Campbell
  Cc: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs, linux-mm, nouveau, dri-devel,
	linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 04:42:07PM -0700, Ralph Campbell wrote:
> This needs to either initialize "page" or be changed to "vmf->page".
> Otherwise, it is a NULL pointer dereference.

Thanks, fixed.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 13/22] device-dax: use the dev_pagemap internal refcount
  2019-06-14  0:22   ` Ira Weiny
@ 2019-06-14  6:35     ` Christoph Hellwig
  0 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-14  6:35 UTC (permalink / raw)
  To: Ira Weiny
  Cc: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs, linux-nvdimm, linux-pci,
	linux-kernel, dri-devel, linux-mm, nouveau

On Thu, Jun 13, 2019 at 05:22:17PM -0700, Ira Weiny wrote:
> > -	dev_dax->pgmap.ref = &dev_dax->ref;
> 
> I don't think this exactly correct.  pgmap.ref is a pointer to the dev_dax ref
> structure.  Taking it away will cause devm_memremap_pages() to fail AFAICS.
> 
> I think you need to change struct dev_pagemap as well:

Take a look at the previous patch, which adds an internal_ref field
to dev_pagemap that ->ref is set up to point to if not otherwise
initialized.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 17/22] mm: remove hmm_devmem_add
  2019-06-13 19:42   ` Jason Gunthorpe
@ 2019-06-14  6:39     ` Christoph Hellwig
  0 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-14  6:39 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Ben Skeggs, linux-mm, nouveau, dri-devel, linux-nvdimm,
	linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 07:42:43PM +0000, Jason Gunthorpe wrote:
> On Thu, Jun 13, 2019 at 11:43:20AM +0200, Christoph Hellwig wrote:
> > There isn't really much value add in the hmm_devmem_add wrapper.  Just
> > factor out a little helper to find the resource, and otherwise let the
> > driver implement the dev_pagemap_ops directly.
> 
> Was this commit message written when other patches were squashed in
> here? I think the helper this mentions was from an earlier patch

Yes, it was written before a lot of bits were split out.  I've updated
it for the next version.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-14  0:43         ` Ira Weiny
  2019-06-14  1:23           ` John Hubbard
@ 2019-06-14  6:43           ` Christoph Hellwig
  1 sibling, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-14  6:43 UTC (permalink / raw)
  To: Ira Weiny
  Cc: Jason Gunthorpe, Ralph Campbell, linux-nvdimm, nouveau,
	linux-kernel, dri-devel, linux-mm, Jérôme Glisse,
	Ben Skeggs, linux-pci, Christoph Hellwig

On Thu, Jun 13, 2019 at 05:43:15PM -0700, Ira Weiny wrote:
> <sigh>  yes but the earlier patch:
> 
> [PATCH 03/22] mm: remove hmm_devmem_add_resource
> 
> Removes the only place type is set to MEMORY_DEVICE_PUBLIC.
> 
> So I think it is ok.  Frankly I was wondering if we should remove the public
> type altogether but conceptually it seems ok.  But I don't see any users of it
> so...  should we get rid of it in the code rather than turning the config off?

That was my original idea.  But then again Jerome spent a lot of effort
putting hooks for it all over the mm and it would seem a little root
to just rip this out ASAP.  I'll give it some more time, but it it doesn't
get used after a few more kernel releases we should nuke it.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 21/22] mm: remove the HMM config option
  2019-06-13 20:01   ` Jason Gunthorpe
@ 2019-06-14  6:47     ` Christoph Hellwig
  0 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-14  6:47 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Ben Skeggs, linux-mm, nouveau, dri-devel, linux-nvdimm,
	linux-pci, linux-kernel

On Thu, Jun 13, 2019 at 08:01:55PM +0000, Jason Gunthorpe wrote:
> On Thu, Jun 13, 2019 at 11:43:24AM +0200, Christoph Hellwig wrote:
> > All the mm/hmm.c code is better keyed off HMM_MIRROR.  Also let nouveau
> > depend on it instead of the mix of a dummy dependency symbol plus the
> > actually selected one.  Drop various odd dependencies, as the code is
> > pretty portable.
> 
> I don't really know, but I thought this needed the arch restriction
> for the same reason get_user_pages has various unique arch specific
> implementations (it does seem to have some open coded GUP like thing)?
> 
> I was hoping we could do this after your common gup series? But sooner
> is better too.

Ok, I've added the arch and 64-bit dependency back in for now.  It does
not look proper to me, and is certainly underdocumented, but the whole
pagetable walking code will need a lot of love eventually anyway, and
the Kconfig stuff for it can be done properly then.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [Nouveau] [PATCH 22/22] mm: don't select MIGRATE_VMA_HELPER from HMM_MIRROR
  2019-06-14  1:53   ` [Nouveau] " John Hubbard
@ 2019-06-14  6:48     ` Christoph Hellwig
  0 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-14  6:48 UTC (permalink / raw)
  To: John Hubbard
  Cc: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs, linux-nvdimm, linux-pci,
	linux-kernel, dri-devel, linux-mm, nouveau

On Thu, Jun 13, 2019 at 06:53:15PM -0700, John Hubbard wrote:
> For those who have out of tree drivers that need migrate_vma(), but are not
> Nouveau, could we pretty please allow a way to select that independently?

No.  The whole point is to not build this fairly big chunk of code in
unless we have a user in the tree.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-14  6:13   ` Christoph Hellwig
@ 2019-06-15  1:14     ` Dan Williams
  2019-06-15  8:33       ` Christoph Hellwig
  0 siblings, 1 reply; 103+ messages in thread
From: Dan Williams @ 2019-06-15  1:14 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jérôme Glisse, Jason Gunthorpe, Ben Skeggs, Linux MM,
	nouveau, Maling list - DRI developers, linux-nvdimm, linux-pci,
	Linux Kernel Mailing List, Andrew Morton

On Thu, Jun 13, 2019 at 11:14 PM Christoph Hellwig <hch@lst.de> wrote:
>
> On Thu, Jun 13, 2019 at 11:27:39AM -0700, Dan Williams wrote:
> > It also turns out the nvdimm unit tests crash with this signature on
> > that branch where base v5.2-rc3 passes:
>
> How do you run that test?

This is the unit test suite that gets kicked off by running "make
check" from the ndctl source repository. In this case it requires the
nfit_test set of modules to create a fake nvdimm environment.

The setup instructions are in the README, but feel free to send me
branches and I can kick off a test. One of these we'll get around to
making it automated for patch submissions to the linux-nvdimm mailing
list.

https://github.com/pmem/ndctl/blob/master/README.md

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 06/22] mm: factor out a devm_request_free_mem_region helper
  2019-06-13  9:43 ` [PATCH 06/22] mm: factor out a devm_request_free_mem_region helper Christoph Hellwig
  2019-06-13 19:16   ` Jason Gunthorpe
@ 2019-06-15  2:21   ` John Hubbard
  2019-06-15 14:30     ` Christoph Hellwig
  1 sibling, 1 reply; 103+ messages in thread
From: John Hubbard @ 2019-06-15  2:21 UTC (permalink / raw)
  To: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs
  Cc: linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci, linux-kernel

On 6/13/19 2:43 AM, Christoph Hellwig wrote:
> Keep the physical address allocation that hmm_add_device does with the
> rest of the resource code, and allow future reuse of it without the hmm
> wrapper.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  include/linux/ioport.h |  2 ++
>  kernel/resource.c      | 39 +++++++++++++++++++++++++++++++++++++++
>  mm/hmm.c               | 33 ++++-----------------------------
>  3 files changed, 45 insertions(+), 29 deletions(-)

Some trivial typos noted below, but this accurately moves the code
into a helper routine, looks good.

Reviewed-by: John Hubbard <jhubbard@nvidia.com> 


> 
> diff --git a/include/linux/ioport.h b/include/linux/ioport.h
> index da0ebaec25f0..76a33ae3bf6c 100644
> --- a/include/linux/ioport.h
> +++ b/include/linux/ioport.h
> @@ -286,6 +286,8 @@ static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
>         return (r1->start <= r2->end && r1->end >= r2->start);
>  }
>  
> +struct resource *devm_request_free_mem_region(struct device *dev,
> +		struct resource *base, unsigned long size);
>  
>  #endif /* __ASSEMBLY__ */
>  #endif	/* _LINUX_IOPORT_H */
> diff --git a/kernel/resource.c b/kernel/resource.c
> index 158f04ec1d4f..99c58134ed1c 100644
> --- a/kernel/resource.c
> +++ b/kernel/resource.c
> @@ -1628,6 +1628,45 @@ void resource_list_free(struct list_head *head)
>  }
>  EXPORT_SYMBOL(resource_list_free);
>  
> +#ifdef CONFIG_DEVICE_PRIVATE
> +/**
> + * devm_request_free_mem_region - find free region for device private memory
> + *
> + * @dev: device struct to bind the resource too

                                               "to"

> + * @size: size in bytes of the device memory to add
> + * @base: resource tree to look in
> + *
> + * This function tries to find an empty range of physical address big enough to
> + * contain the new resource, so that it can later be hotpluged as ZONE_DEVICE

                                                        "hotplugged"

> + * memory, which in turn allocates struct pages.
> + */
> +struct resource *devm_request_free_mem_region(struct device *dev,
> +		struct resource *base, unsigned long size)
> +{
> +	resource_size_t end, addr;
> +	struct resource *res;
> +
> +	size = ALIGN(size, 1UL << PA_SECTION_SHIFT);
> +	end = min_t(unsigned long, base->end, (1UL << MAX_PHYSMEM_BITS) - 1);
> +	addr = end - size + 1UL;
> +
> +	for (; addr > size && addr >= base->start; addr -= size) {
> +		if (region_intersects(addr, size, 0, IORES_DESC_NONE) !=
> +				REGION_DISJOINT)
> +			continue;
> +
> +		res = devm_request_mem_region(dev, addr, size, dev_name(dev));
> +		if (!res)
> +			return ERR_PTR(-ENOMEM);
> +		res->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY;
> +		return res;
> +	}
> +
> +	return ERR_PTR(-ERANGE);
> +}
> +EXPORT_SYMBOL_GPL(devm_request_free_mem_region);
> +#endif /* CONFIG_DEVICE_PRIVATE */
> +
>  static int __init strict_iomem(char *str)
>  {
>  	if (strstr(str, "relaxed"))
> diff --git a/mm/hmm.c b/mm/hmm.c
> index e1dc98407e7b..13a16faf0a77 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -26,8 +26,6 @@
>  #include <linux/mmu_notifier.h>
>  #include <linux/memory_hotplug.h>
>  
> -#define PA_SECTION_SIZE (1UL << PA_SECTION_SHIFT)
> -
>  #if IS_ENABLED(CONFIG_HMM_MIRROR)
>  static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
>  
> @@ -1372,7 +1370,6 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
>  				  unsigned long size)
>  {
>  	struct hmm_devmem *devmem;
> -	resource_size_t addr;
>  	void *result;
>  	int ret;
>  
> @@ -1398,32 +1395,10 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
>  	if (ret)
>  		return ERR_PTR(ret);
>  
> -	size = ALIGN(size, PA_SECTION_SIZE);
> -	addr = min((unsigned long)iomem_resource.end,
> -		   (1UL << MAX_PHYSMEM_BITS) - 1);
> -	addr = addr - size + 1UL;
> -
> -	/*
> -	 * FIXME add a new helper to quickly walk resource tree and find free
> -	 * range
> -	 *
> -	 * FIXME what about ioport_resource resource ?
> -	 */
> -	for (; addr > size && addr >= iomem_resource.start; addr -= size) {
> -		ret = region_intersects(addr, size, 0, IORES_DESC_NONE);
> -		if (ret != REGION_DISJOINT)
> -			continue;
> -
> -		devmem->resource = devm_request_mem_region(device, addr, size,
> -							   dev_name(device));
> -		if (!devmem->resource)
> -			return ERR_PTR(-ENOMEM);
> -		break;
> -	}
> -	if (!devmem->resource)
> -		return ERR_PTR(-ERANGE);
> -
> -	devmem->resource->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY;
> +	devmem->resource = devm_request_free_mem_region(device, &iomem_resource,
> +			size);
> +	if (IS_ERR(devmem->resource))
> +		return ERR_CAST(devmem->resource);
>  	devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
>  	devmem->pfn_last = devmem->pfn_first +
>  			   (resource_size(devmem->resource) >> PAGE_SHIFT);
> 


thanks,
-- 
John Hubbard
NVIDIA

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-15  1:14     ` Dan Williams
@ 2019-06-15  8:33       ` Christoph Hellwig
  2019-06-15 18:09         ` Dan Williams
  0 siblings, 1 reply; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-15  8:33 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, Linux MM, nouveau, Maling list - DRI developers,
	linux-nvdimm, linux-pci, Linux Kernel Mailing List,
	Andrew Morton

On Fri, Jun 14, 2019 at 06:14:45PM -0700, Dan Williams wrote:
> On Thu, Jun 13, 2019 at 11:14 PM Christoph Hellwig <hch@lst.de> wrote:
> >
> > On Thu, Jun 13, 2019 at 11:27:39AM -0700, Dan Williams wrote:
> > > It also turns out the nvdimm unit tests crash with this signature on
> > > that branch where base v5.2-rc3 passes:
> >
> > How do you run that test?
> 
> This is the unit test suite that gets kicked off by running "make
> check" from the ndctl source repository. In this case it requires the
> nfit_test set of modules to create a fake nvdimm environment.
> 
> The setup instructions are in the README, but feel free to send me
> branches and I can kick off a test. One of these we'll get around to
> making it automated for patch submissions to the linux-nvdimm mailing
> list.

Oh, now I remember, and that was the bummer as anything requiring modules
just does not fit at all into my normal test flows that just inject
kernel images and use otherwise static images.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 06/22] mm: factor out a devm_request_free_mem_region helper
  2019-06-15  2:21   ` John Hubbard
@ 2019-06-15 14:30     ` Christoph Hellwig
  0 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-15 14:30 UTC (permalink / raw)
  To: John Hubbard
  Cc: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs, linux-mm, nouveau, dri-devel,
	linux-nvdimm, linux-pci, linux-kernel

On Fri, Jun 14, 2019 at 07:21:54PM -0700, John Hubbard wrote:
> On 6/13/19 2:43 AM, Christoph Hellwig wrote:
> > Keep the physical address allocation that hmm_add_device does with the
> > rest of the resource code, and allow future reuse of it without the hmm
> > wrapper.
> > 
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > ---
> >  include/linux/ioport.h |  2 ++
> >  kernel/resource.c      | 39 +++++++++++++++++++++++++++++++++++++++
> >  mm/hmm.c               | 33 ++++-----------------------------
> >  3 files changed, 45 insertions(+), 29 deletions(-)
> 
> Some trivial typos noted below, but this accurately moves the code
> into a helper routine, looks good.

Thanks for the typo spotting.  These two actually were copy and pasted
from the original hmm code, but I'll gladly fix them for the next
iteration.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: dev_pagemap related cleanups
  2019-06-15  8:33       ` Christoph Hellwig
@ 2019-06-15 18:09         ` Dan Williams
  0 siblings, 0 replies; 103+ messages in thread
From: Dan Williams @ 2019-06-15 18:09 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jérôme Glisse, Jason Gunthorpe, Ben Skeggs, Linux MM,
	nouveau, Maling list - DRI developers, linux-nvdimm, linux-pci,
	Linux Kernel Mailing List, Andrew Morton

On Sat, Jun 15, 2019 at 1:34 AM Christoph Hellwig <hch@lst.de> wrote:
>
> On Fri, Jun 14, 2019 at 06:14:45PM -0700, Dan Williams wrote:
> > On Thu, Jun 13, 2019 at 11:14 PM Christoph Hellwig <hch@lst.de> wrote:
> > >
> > > On Thu, Jun 13, 2019 at 11:27:39AM -0700, Dan Williams wrote:
> > > > It also turns out the nvdimm unit tests crash with this signature on
> > > > that branch where base v5.2-rc3 passes:
> > >
> > > How do you run that test?
> >
> > This is the unit test suite that gets kicked off by running "make
> > check" from the ndctl source repository. In this case it requires the
> > nfit_test set of modules to create a fake nvdimm environment.
> >
> > The setup instructions are in the README, but feel free to send me
> > branches and I can kick off a test. One of these we'll get around to
> > making it automated for patch submissions to the linux-nvdimm mailing
> > list.
>
> Oh, now I remember, and that was the bummer as anything requiring modules
> just does not fit at all into my normal test flows that just inject
> kernel images and use otherwise static images.

Yeah... although we do have some changes being proposed from non-x86
devs to allow a subset of the tests to run without the nfit_test
modules: https://patchwork.kernel.org/patch/10980779/

...so this prompts me to go review that patch.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-14  1:23           ` John Hubbard
@ 2019-06-19 19:27             ` Jason Gunthorpe
  2019-06-19 19:46               ` Dan Williams
  2019-06-26  3:15               ` John Hubbard
  0 siblings, 2 replies; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-19 19:27 UTC (permalink / raw)
  To: John Hubbard
  Cc: Ira Weiny, Ralph Campbell, linux-nvdimm, nouveau, linux-kernel,
	dri-devel, linux-mm, Jérôme Glisse, Ben Skeggs,
	linux-pci, Christoph Hellwig

On Thu, Jun 13, 2019 at 06:23:04PM -0700, John Hubbard wrote:
> On 6/13/19 5:43 PM, Ira Weiny wrote:
> > On Thu, Jun 13, 2019 at 07:58:29PM +0000, Jason Gunthorpe wrote:
> >> On Thu, Jun 13, 2019 at 12:53:02PM -0700, Ralph Campbell wrote:
> >>>
> ...
> >> Hum, so the only thing this config does is short circuit here:
> >>
> >> static inline bool is_device_public_page(const struct page *page)
> >> {
> >>         return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
> >>                 IS_ENABLED(CONFIG_DEVICE_PUBLIC) &&
> >>                 is_zone_device_page(page) &&
> >>                 page->pgmap->type == MEMORY_DEVICE_PUBLIC;
> >> }
> >>
> >> Which is called all over the place.. 
> > 
> > <sigh>  yes but the earlier patch:
> > 
> > [PATCH 03/22] mm: remove hmm_devmem_add_resource
> > 
> > Removes the only place type is set to MEMORY_DEVICE_PUBLIC.
> > 
> > So I think it is ok.  Frankly I was wondering if we should remove the public
> > type altogether but conceptually it seems ok.  But I don't see any users of it
> > so...  should we get rid of it in the code rather than turning the config off?
> > 
> > Ira
> 
> That seems reasonable. I recall that the hope was for those IBM Power 9
> systems to use _PUBLIC, as they have hardware-based coherent device (GPU)
> memory, and so the memory really is visible to the CPU. And the IBM team
> was thinking of taking advantage of it. But I haven't seen anything on
> that front for a while.

Does anyone know who those people are and can we encourage them to
send some patches? :)

Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-19 19:27             ` Jason Gunthorpe
@ 2019-06-19 19:46               ` Dan Williams
  2019-06-26  3:15               ` John Hubbard
  1 sibling, 0 replies; 103+ messages in thread
From: Dan Williams @ 2019-06-19 19:46 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: John Hubbard, Ralph Campbell, linux-nvdimm, nouveau,
	linux-kernel, dri-devel, linux-mm, Jérôme Glisse,
	Ben Skeggs, linux-pci, Christoph Hellwig

On Wed, Jun 19, 2019 at 12:42 PM Jason Gunthorpe <jgg@mellanox.com> wrote:
>
> On Thu, Jun 13, 2019 at 06:23:04PM -0700, John Hubbard wrote:
> > On 6/13/19 5:43 PM, Ira Weiny wrote:
> > > On Thu, Jun 13, 2019 at 07:58:29PM +0000, Jason Gunthorpe wrote:
> > >> On Thu, Jun 13, 2019 at 12:53:02PM -0700, Ralph Campbell wrote:
> > >>>
> > ...
> > >> Hum, so the only thing this config does is short circuit here:
> > >>
> > >> static inline bool is_device_public_page(const struct page *page)
> > >> {
> > >>         return IS_ENABLED(CONFIG_DEV_PAGEMAP_OPS) &&
> > >>                 IS_ENABLED(CONFIG_DEVICE_PUBLIC) &&
> > >>                 is_zone_device_page(page) &&
> > >>                 page->pgmap->type == MEMORY_DEVICE_PUBLIC;
> > >> }
> > >>
> > >> Which is called all over the place..
> > >
> > > <sigh>  yes but the earlier patch:
> > >
> > > [PATCH 03/22] mm: remove hmm_devmem_add_resource
> > >
> > > Removes the only place type is set to MEMORY_DEVICE_PUBLIC.
> > >
> > > So I think it is ok.  Frankly I was wondering if we should remove the public
> > > type altogether but conceptually it seems ok.  But I don't see any users of it
> > > so...  should we get rid of it in the code rather than turning the config off?
> > >
> > > Ira
> >
> > That seems reasonable. I recall that the hope was for those IBM Power 9
> > systems to use _PUBLIC, as they have hardware-based coherent device (GPU)
> > memory, and so the memory really is visible to the CPU. And the IBM team
> > was thinking of taking advantage of it. But I haven't seen anything on
> > that front for a while.
>
> Does anyone know who those people are and can we encourage them to
> send some patches? :)

I expect marking it CONFIG_BROKEN with the threat of deleting it if no
patches show up *is* the encouragement.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 05/22] mm: export alloc_pages_vma
  2019-06-13  9:43 ` [PATCH 05/22] mm: export alloc_pages_vma Christoph Hellwig
  2019-06-14  1:47   ` John Hubbard
@ 2019-06-20 19:17   ` Michal Hocko
  2019-06-24 18:24     ` Dan Williams
  1 sibling, 1 reply; 103+ messages in thread
From: Michal Hocko @ 2019-06-20 19:17 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, linux-mm, nouveau, dri-devel, linux-nvdimm,
	linux-pci, linux-kernel

On Thu 13-06-19 11:43:08, Christoph Hellwig wrote:
> noveau is currently using this through an odd hmm wrapper, and I plan
> to switch it to the real thing later in this series.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  mm/mempolicy.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 01600d80ae01..f9023b5fba37 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -2098,6 +2098,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
>  out:
>  	return page;
>  }
> +EXPORT_SYMBOL_GPL(alloc_pages_vma);

All allocator exported symbols are EXPORT_SYMBOL, what is a reason to
have this one special?

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-13  9:43 ` [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken Christoph Hellwig
  2019-06-13 19:44   ` Jason Gunthorpe
@ 2019-06-20 19:26   ` Michal Hocko
  2019-06-25  7:29     ` Christoph Hellwig
  1 sibling, 1 reply; 103+ messages in thread
From: Michal Hocko @ 2019-06-20 19:26 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, linux-mm, nouveau, dri-devel, linux-nvdimm,
	linux-pci, linux-kernel

On Thu 13-06-19 11:43:21, Christoph Hellwig wrote:
> The code hasn't been used since it was added to the tree, and doesn't
> appear to actually be usable.  Mark it as BROKEN until either a user
> comes along or we finally give up on it.

I would go even further and simply remove all the DEVICE_PUBLIC code.

> Signed-off-by: Christoph Hellwig <hch@lst.de>

Anyway
Acked-by: Michal Hocko <mhocko@suse.com>

> ---
>  mm/Kconfig | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 0d2ba7e1f43e..406fa45e9ecc 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -721,6 +721,7 @@ config DEVICE_PRIVATE
>  config DEVICE_PUBLIC
>  	bool "Addressable device memory (like GPU memory)"
>  	depends on ARCH_HAS_HMM
> +	depends on BROKEN
>  	select HMM
>  	select DEV_PAGEMAP_OPS
>  
> -- 
> 2.20.1

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 03/22] mm: remove hmm_devmem_add_resource
  2019-06-13  9:43 ` [PATCH 03/22] mm: remove hmm_devmem_add_resource Christoph Hellwig
  2019-06-13 18:52   ` Jason Gunthorpe
  2019-06-14  0:54   ` [Nouveau] " John Hubbard
@ 2019-06-20 19:32   ` Michal Hocko
  2 siblings, 0 replies; 103+ messages in thread
From: Michal Hocko @ 2019-06-20 19:32 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, linux-mm, nouveau, dri-devel, linux-nvdimm,
	linux-pci, linux-kernel

On Thu 13-06-19 11:43:06, Christoph Hellwig wrote:
> This function has never been used since it was first added to the kernel
> more than a year and a half ago, and if we ever grow a consumer of the
> MEMORY_DEVICE_PUBLIC infrastructure it can easily use devm_memremap_pages
> directly now that we've simplified the API for it.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Acked-by: Michal Hocko <mhocko@suse.com>

> ---
>  include/linux/hmm.h |  3 ---
>  mm/hmm.c            | 54 ---------------------------------------------
>  2 files changed, 57 deletions(-)
> 
> diff --git a/include/linux/hmm.h b/include/linux/hmm.h
> index 4867b9da1b6c..5761a39221a6 100644
> --- a/include/linux/hmm.h
> +++ b/include/linux/hmm.h
> @@ -688,9 +688,6 @@ struct hmm_devmem {
>  struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
>  				  struct device *device,
>  				  unsigned long size);
> -struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
> -					   struct device *device,
> -					   struct resource *res);
>  
>  /*
>   * hmm_devmem_page_set_drvdata - set per-page driver data field
> diff --git a/mm/hmm.c b/mm/hmm.c
> index ff2598eb7377..0c62426d1257 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -1445,58 +1445,4 @@ struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
>  	return devmem;
>  }
>  EXPORT_SYMBOL_GPL(hmm_devmem_add);
> -
> -struct hmm_devmem *hmm_devmem_add_resource(const struct hmm_devmem_ops *ops,
> -					   struct device *device,
> -					   struct resource *res)
> -{
> -	struct hmm_devmem *devmem;
> -	void *result;
> -	int ret;
> -
> -	if (res->desc != IORES_DESC_DEVICE_PUBLIC_MEMORY)
> -		return ERR_PTR(-EINVAL);
> -
> -	dev_pagemap_get_ops();
> -
> -	devmem = devm_kzalloc(device, sizeof(*devmem), GFP_KERNEL);
> -	if (!devmem)
> -		return ERR_PTR(-ENOMEM);
> -
> -	init_completion(&devmem->completion);
> -	devmem->pfn_first = -1UL;
> -	devmem->pfn_last = -1UL;
> -	devmem->resource = res;
> -	devmem->device = device;
> -	devmem->ops = ops;
> -
> -	ret = percpu_ref_init(&devmem->ref, &hmm_devmem_ref_release,
> -			      0, GFP_KERNEL);
> -	if (ret)
> -		return ERR_PTR(ret);
> -
> -	ret = devm_add_action_or_reset(device, hmm_devmem_ref_exit,
> -			&devmem->ref);
> -	if (ret)
> -		return ERR_PTR(ret);
> -
> -	devmem->pfn_first = devmem->resource->start >> PAGE_SHIFT;
> -	devmem->pfn_last = devmem->pfn_first +
> -			   (resource_size(devmem->resource) >> PAGE_SHIFT);
> -	devmem->page_fault = hmm_devmem_fault;
> -
> -	devmem->pagemap.type = MEMORY_DEVICE_PUBLIC;
> -	devmem->pagemap.res = *devmem->resource;
> -	devmem->pagemap.page_free = hmm_devmem_free;
> -	devmem->pagemap.altmap_valid = false;
> -	devmem->pagemap.ref = &devmem->ref;
> -	devmem->pagemap.data = devmem;
> -	devmem->pagemap.kill = hmm_devmem_ref_kill;
> -
> -	result = devm_memremap_pages(devmem->device, &devmem->pagemap);
> -	if (IS_ERR(result))
> -		return result;
> -	return devmem;
> -}
> -EXPORT_SYMBOL_GPL(hmm_devmem_add_resource);
>  #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
> -- 
> 2.20.1

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 04/22] mm: don't clear ->mapping in hmm_devmem_free
  2019-06-13  9:43 ` [PATCH 04/22] mm: don't clear ->mapping in hmm_devmem_free Christoph Hellwig
  2019-06-13 19:05   ` Jason Gunthorpe
  2019-06-14  1:46   ` John Hubbard
@ 2019-06-20 19:36   ` Michal Hocko
  2 siblings, 0 replies; 103+ messages in thread
From: Michal Hocko @ 2019-06-20 19:36 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, linux-mm, nouveau, dri-devel, linux-nvdimm,
	linux-pci, linux-kernel

On Thu 13-06-19 11:43:07, Christoph Hellwig wrote:
> ->mapping isn't even used by HMM users, and the field at the same offset
> in the zone_device part of the union is declared as pad.  (Which btw is
> rather confusing, as DAX uses ->pgmap and ->mapping from two different
> sides of the union, but DAX doesn't use hmm_devmem_free).
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

I cannot really judge here but setting mapping here without any comment
is quite confusing. So if this is safe to remove then it is certainly an
improvement.

> ---
>  mm/hmm.c | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/mm/hmm.c b/mm/hmm.c
> index 0c62426d1257..e1dc98407e7b 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -1347,8 +1347,6 @@ static void hmm_devmem_free(struct page *page, void *data)
>  {
>  	struct hmm_devmem *devmem = data;
>  
> -	page->mapping = NULL;
> -
>  	devmem->ops->free(devmem, page);
>  }
>  
> -- 
> 2.20.1

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 05/22] mm: export alloc_pages_vma
  2019-06-20 19:17   ` Michal Hocko
@ 2019-06-24 18:24     ` Dan Williams
  2019-06-25  7:23       ` Christoph Hellwig
  0 siblings, 1 reply; 103+ messages in thread
From: Dan Williams @ 2019-06-24 18:24 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Christoph Hellwig, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, Linux MM, nouveau, Maling list - DRI developers,
	linux-nvdimm, linux-pci, Linux Kernel Mailing List

On Thu, Jun 20, 2019 at 12:17 PM Michal Hocko <mhocko@kernel.org> wrote:
>
> On Thu 13-06-19 11:43:08, Christoph Hellwig wrote:
> > noveau is currently using this through an odd hmm wrapper, and I plan
> > to switch it to the real thing later in this series.
> >
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > ---
> >  mm/mempolicy.c | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> > index 01600d80ae01..f9023b5fba37 100644
> > --- a/mm/mempolicy.c
> > +++ b/mm/mempolicy.c
> > @@ -2098,6 +2098,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
> >  out:
> >       return page;
> >  }
> > +EXPORT_SYMBOL_GPL(alloc_pages_vma);
>
> All allocator exported symbols are EXPORT_SYMBOL, what is a reason to
> have this one special?

I asked for this simply because it was not exported historically. In
general I want to establish explicit export-type criteria so the
community can spend less time debating when to use EXPORT_SYMBOL_GPL
[1].

The thought in this instance is that it is not historically exported
to modules and it is safer from a maintenance perspective to start
with GPL-only for new symbols in case we don't want to maintain that
interface long-term for out-of-tree modules.

Yes, we always reserve the right to remove / change interfaces
regardless of the export type, but history has shown that external
pressure to keep an interface stable (contrary to
Documentation/process/stable-api-nonsense.rst) tends to be less for
GPL-only exports.

[1]: https://lists.linuxfoundation.org/pipermail/ksummit-discuss/2018-September/005688.html

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 05/22] mm: export alloc_pages_vma
  2019-06-24 18:24     ` Dan Williams
@ 2019-06-25  7:23       ` Christoph Hellwig
  2019-06-25 15:00         ` Michal Hocko
  0 siblings, 1 reply; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-25  7:23 UTC (permalink / raw)
  To: Dan Williams
  Cc: Michal Hocko, Christoph Hellwig, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs, Linux MM, nouveau,
	Maling list - DRI developers, linux-nvdimm, linux-pci,
	Linux Kernel Mailing List

On Mon, Jun 24, 2019 at 11:24:48AM -0700, Dan Williams wrote:
> I asked for this simply because it was not exported historically. In
> general I want to establish explicit export-type criteria so the
> community can spend less time debating when to use EXPORT_SYMBOL_GPL
> [1].
> 
> The thought in this instance is that it is not historically exported
> to modules and it is safer from a maintenance perspective to start
> with GPL-only for new symbols in case we don't want to maintain that
> interface long-term for out-of-tree modules.
> 
> Yes, we always reserve the right to remove / change interfaces
> regardless of the export type, but history has shown that external
> pressure to keep an interface stable (contrary to
> Documentation/process/stable-api-nonsense.rst) tends to be less for
> GPL-only exports.

Fully agreed.  In the end the decision is with the MM maintainers,
though, although I'd prefer to keep it as in this series.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-20 19:26   ` Michal Hocko
@ 2019-06-25  7:29     ` Christoph Hellwig
  2019-06-25 11:44       ` Jason Gunthorpe
  0 siblings, 1 reply; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-25  7:29 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Christoph Hellwig, Dan Williams, Jérôme Glisse,
	Jason Gunthorpe, Ben Skeggs, linux-mm, nouveau, dri-devel,
	linux-nvdimm, linux-pci, linux-kernel

On Thu, Jun 20, 2019 at 09:26:48PM +0200, Michal Hocko wrote:
> On Thu 13-06-19 11:43:21, Christoph Hellwig wrote:
> > The code hasn't been used since it was added to the tree, and doesn't
> > appear to actually be usable.  Mark it as BROKEN until either a user
> > comes along or we finally give up on it.
> 
> I would go even further and simply remove all the DEVICE_PUBLIC code.

I looked into that as I now got the feedback twice.  It would
create a conflict with another tree cleaning things up around the
is_device_private defintion, but otherwise I'd be glad to just remove
it.

Jason, as this goes through your tree, do you mind the additional
conflict?

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-25  7:29     ` Christoph Hellwig
@ 2019-06-25 11:44       ` Jason Gunthorpe
  2019-06-25 11:59         ` Christoph Hellwig
  0 siblings, 1 reply; 103+ messages in thread
From: Jason Gunthorpe @ 2019-06-25 11:44 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Michal Hocko, Dan Williams, Jérôme Glisse, Ben Skeggs,
	linux-mm, nouveau, dri-devel, linux-nvdimm, linux-pci,
	linux-kernel

On Tue, Jun 25, 2019 at 09:29:15AM +0200, Christoph Hellwig wrote:
> On Thu, Jun 20, 2019 at 09:26:48PM +0200, Michal Hocko wrote:
> > On Thu 13-06-19 11:43:21, Christoph Hellwig wrote:
> > > The code hasn't been used since it was added to the tree, and doesn't
> > > appear to actually be usable.  Mark it as BROKEN until either a user
> > > comes along or we finally give up on it.
> > 
> > I would go even further and simply remove all the DEVICE_PUBLIC code.
> 
> I looked into that as I now got the feedback twice.  It would
> create a conflict with another tree cleaning things up around the
> is_device_private defintion, but otherwise I'd be glad to just remove
> it.
> 
> Jason, as this goes through your tree, do you mind the additional
> conflict?

Which tree and what does the resolution look like?

Also, I don't want to be making the decision if we should keep/remove
DEVICE_PUBLIC, so let's get an Ack from Andrew/etc?

My main reluctance is that I know there is HW out there that can do
coherent, and I want to believe they are coming with patches, just
too slowly. But I'd also rather those people defend themselves :P

Thanks,
Jason

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-25 11:44       ` Jason Gunthorpe
@ 2019-06-25 11:59         ` Christoph Hellwig
  0 siblings, 0 replies; 103+ messages in thread
From: Christoph Hellwig @ 2019-06-25 11:59 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Christoph Hellwig, Michal Hocko, Dan Williams,
	Jérôme Glisse, Ben Skeggs, linux-mm, nouveau,
	dri-devel, linux-nvdimm, linux-pci, linux-kernel

On Tue, Jun 25, 2019 at 11:44:28AM +0000, Jason Gunthorpe wrote:
> Which tree and what does the resolution look like?

Looks like in -mm.  The current commit in linux-next is:

commit 0d23b042f26955fb35721817beb98ba7f1d9ed9f
Author: Robin Murphy <robin.murphy@arm.com>
Date:   Fri Jun 14 10:42:14 2019 +1000

    mm: clean up is_device_*_page() definitions


> Also, I don't want to be making the decision if we should keep/remove
> DEVICE_PUBLIC, so let's get an Ack from Andrew/etc?
> 
> My main reluctance is that I know there is HW out there that can do
> coherent, and I want to believe they are coming with patches, just
> too slowly. But I'd also rather those people defend themselves :P

Lets keep everything as-is for now.  I'm pretty certain nothing
will show up, but letting this linger another release or two
shouldn't be much of a problem.  And if we urgently feel like removing
it we can do it after -rc1.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 05/22] mm: export alloc_pages_vma
  2019-06-25  7:23       ` Christoph Hellwig
@ 2019-06-25 15:00         ` Michal Hocko
  2019-06-25 18:03           ` Dan Williams
  0 siblings, 1 reply; 103+ messages in thread
From: Michal Hocko @ 2019-06-25 15:00 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Dan Williams, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, Linux MM, nouveau, Maling list - DRI developers,
	linux-nvdimm, linux-pci, Linux Kernel Mailing List

On Tue 25-06-19 09:23:17, Christoph Hellwig wrote:
> On Mon, Jun 24, 2019 at 11:24:48AM -0700, Dan Williams wrote:
> > I asked for this simply because it was not exported historically. In
> > general I want to establish explicit export-type criteria so the
> > community can spend less time debating when to use EXPORT_SYMBOL_GPL
> > [1].
> > 
> > The thought in this instance is that it is not historically exported
> > to modules and it is safer from a maintenance perspective to start
> > with GPL-only for new symbols in case we don't want to maintain that
> > interface long-term for out-of-tree modules.
> > 
> > Yes, we always reserve the right to remove / change interfaces
> > regardless of the export type, but history has shown that external
> > pressure to keep an interface stable (contrary to
> > Documentation/process/stable-api-nonsense.rst) tends to be less for
> > GPL-only exports.
> 
> Fully agreed.  In the end the decision is with the MM maintainers,
> though, although I'd prefer to keep it as in this series.

I am sorry but I am not really convinced by the above reasoning wrt. to
the allocator API and it has been a subject of many changes over time. I
do not remember a single case where we would be bending the allocator
API because of external modules and I am pretty sure we will push back
heavily if that was the case in the future.

So in this particular case I would go with consistency and export the
same way we do with other functions. Also we do not want people to
reinvent this API and screw that like we have seen in other cases when
external modules try reimplement core functionality themselves.

Thanks!
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 05/22] mm: export alloc_pages_vma
  2019-06-25 15:00         ` Michal Hocko
@ 2019-06-25 18:03           ` Dan Williams
  2019-06-25 19:00             ` Michal Hocko
  0 siblings, 1 reply; 103+ messages in thread
From: Dan Williams @ 2019-06-25 18:03 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Christoph Hellwig, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, Linux MM, nouveau, Maling list - DRI developers,
	linux-nvdimm, linux-pci, Linux Kernel Mailing List

On Tue, Jun 25, 2019 at 8:01 AM Michal Hocko <mhocko@kernel.org> wrote:
>
> On Tue 25-06-19 09:23:17, Christoph Hellwig wrote:
> > On Mon, Jun 24, 2019 at 11:24:48AM -0700, Dan Williams wrote:
> > > I asked for this simply because it was not exported historically. In
> > > general I want to establish explicit export-type criteria so the
> > > community can spend less time debating when to use EXPORT_SYMBOL_GPL
> > > [1].
> > >
> > > The thought in this instance is that it is not historically exported
> > > to modules and it is safer from a maintenance perspective to start
> > > with GPL-only for new symbols in case we don't want to maintain that
> > > interface long-term for out-of-tree modules.
> > >
> > > Yes, we always reserve the right to remove / change interfaces
> > > regardless of the export type, but history has shown that external
> > > pressure to keep an interface stable (contrary to
> > > Documentation/process/stable-api-nonsense.rst) tends to be less for
> > > GPL-only exports.
> >
> > Fully agreed.  In the end the decision is with the MM maintainers,
> > though, although I'd prefer to keep it as in this series.
>
> I am sorry but I am not really convinced by the above reasoning wrt. to
> the allocator API and it has been a subject of many changes over time. I
> do not remember a single case where we would be bending the allocator
> API because of external modules and I am pretty sure we will push back
> heavily if that was the case in the future.

This seems to say that you have no direct experience of dealing with
changing symbols that that a prominent out-of-tree module needs? GPU
drivers and the core-mm are on a path to increase their cooperation on
memory management mechanisms over time, and symbol export changes for
out-of-tree GPU drivers have been a significant source of friction in
the past.

> So in this particular case I would go with consistency and export the
> same way we do with other functions. Also we do not want people to
> reinvent this API and screw that like we have seen in other cases when
> external modules try reimplement core functionality themselves.

Consistency is a weak argument when the cost to the upstream community
is negligible. If the same functionality was available via another /
already exported interface *that* would be an argument to maintain the
existing export policy. "Consistency" in and of itself is not a
precedent we can use more widely in default export-type decisions.

Effectively I'm arguing EXPORT_SYMBOL_GPL by default with a later
decision to drop the _GPL. Similar to how we are careful to mark sysfs
interfaces in Documentation/ABI/ that we are not fully committed to
maintaining over time, or are otherwise so new that there is not yet a
good read on whether they can be made permanent.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 05/22] mm: export alloc_pages_vma
  2019-06-25 18:03           ` Dan Williams
@ 2019-06-25 19:00             ` Michal Hocko
  2019-06-25 19:52               ` Dan Williams
  0 siblings, 1 reply; 103+ messages in thread
From: Michal Hocko @ 2019-06-25 19:00 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, Linux MM, nouveau, Maling list - DRI developers,
	linux-nvdimm, linux-pci, Linux Kernel Mailing List

On Tue 25-06-19 11:03:53, Dan Williams wrote:
> On Tue, Jun 25, 2019 at 8:01 AM Michal Hocko <mhocko@kernel.org> wrote:
> >
> > On Tue 25-06-19 09:23:17, Christoph Hellwig wrote:
> > > On Mon, Jun 24, 2019 at 11:24:48AM -0700, Dan Williams wrote:
> > > > I asked for this simply because it was not exported historically. In
> > > > general I want to establish explicit export-type criteria so the
> > > > community can spend less time debating when to use EXPORT_SYMBOL_GPL
> > > > [1].
> > > >
> > > > The thought in this instance is that it is not historically exported
> > > > to modules and it is safer from a maintenance perspective to start
> > > > with GPL-only for new symbols in case we don't want to maintain that
> > > > interface long-term for out-of-tree modules.
> > > >
> > > > Yes, we always reserve the right to remove / change interfaces
> > > > regardless of the export type, but history has shown that external
> > > > pressure to keep an interface stable (contrary to
> > > > Documentation/process/stable-api-nonsense.rst) tends to be less for
> > > > GPL-only exports.
> > >
> > > Fully agreed.  In the end the decision is with the MM maintainers,
> > > though, although I'd prefer to keep it as in this series.
> >
> > I am sorry but I am not really convinced by the above reasoning wrt. to
> > the allocator API and it has been a subject of many changes over time. I
> > do not remember a single case where we would be bending the allocator
> > API because of external modules and I am pretty sure we will push back
> > heavily if that was the case in the future.
> 
> This seems to say that you have no direct experience of dealing with
> changing symbols that that a prominent out-of-tree module needs? GPU
> drivers and the core-mm are on a path to increase their cooperation on
> memory management mechanisms over time, and symbol export changes for
> out-of-tree GPU drivers have been a significant source of friction in
> the past.

I have an experience e.g. to rework semantic of some gfp flags and that is
something that users usualy get wrong and never heard that an out of
tree code would insist on an old semantic and pushing us to the corner.

> > So in this particular case I would go with consistency and export the
> > same way we do with other functions. Also we do not want people to
> > reinvent this API and screw that like we have seen in other cases when
> > external modules try reimplement core functionality themselves.
> 
> Consistency is a weak argument when the cost to the upstream community
> is negligible. If the same functionality was available via another /
> already exported interface *that* would be an argument to maintain the
> existing export policy. "Consistency" in and of itself is not a
> precedent we can use more widely in default export-type decisions.
> 
> Effectively I'm arguing EXPORT_SYMBOL_GPL by default with a later
> decision to drop the _GPL. Similar to how we are careful to mark sysfs
> interfaces in Documentation/ABI/ that we are not fully committed to
> maintaining over time, or are otherwise so new that there is not yet a
> good read on whether they can be made permanent.

Documentation/process/stable-api-nonsense.rst
Really. If you want to play with GPL vs. EXPORT_SYMBOL else this is up
to you but I do not see any technical argument to make this particular
interface to the page allocator any different from all others that are
exported to modules.
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 05/22] mm: export alloc_pages_vma
  2019-06-25 19:00             ` Michal Hocko
@ 2019-06-25 19:52               ` Dan Williams
  2019-06-26  5:46                 ` Michal Hocko
  0 siblings, 1 reply; 103+ messages in thread
From: Dan Williams @ 2019-06-25 19:52 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Christoph Hellwig, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, Linux MM, nouveau, Maling list - DRI developers,
	linux-nvdimm, linux-pci, Linux Kernel Mailing List

On Tue, Jun 25, 2019 at 12:01 PM Michal Hocko <mhocko@kernel.org> wrote:
>
> On Tue 25-06-19 11:03:53, Dan Williams wrote:
> > On Tue, Jun 25, 2019 at 8:01 AM Michal Hocko <mhocko@kernel.org> wrote:
> > >
> > > On Tue 25-06-19 09:23:17, Christoph Hellwig wrote:
> > > > On Mon, Jun 24, 2019 at 11:24:48AM -0700, Dan Williams wrote:
> > > > > I asked for this simply because it was not exported historically. In
> > > > > general I want to establish explicit export-type criteria so the
> > > > > community can spend less time debating when to use EXPORT_SYMBOL_GPL
> > > > > [1].
> > > > >
> > > > > The thought in this instance is that it is not historically exported
> > > > > to modules and it is safer from a maintenance perspective to start
> > > > > with GPL-only for new symbols in case we don't want to maintain that
> > > > > interface long-term for out-of-tree modules.
> > > > >
> > > > > Yes, we always reserve the right to remove / change interfaces
> > > > > regardless of the export type, but history has shown that external
> > > > > pressure to keep an interface stable (contrary to
> > > > > Documentation/process/stable-api-nonsense.rst) tends to be less for
> > > > > GPL-only exports.
> > > >
> > > > Fully agreed.  In the end the decision is with the MM maintainers,
> > > > though, although I'd prefer to keep it as in this series.
> > >
> > > I am sorry but I am not really convinced by the above reasoning wrt. to
> > > the allocator API and it has been a subject of many changes over time. I
> > > do not remember a single case where we would be bending the allocator
> > > API because of external modules and I am pretty sure we will push back
> > > heavily if that was the case in the future.
> >
> > This seems to say that you have no direct experience of dealing with
> > changing symbols that that a prominent out-of-tree module needs? GPU
> > drivers and the core-mm are on a path to increase their cooperation on
> > memory management mechanisms over time, and symbol export changes for
> > out-of-tree GPU drivers have been a significant source of friction in
> > the past.
>
> I have an experience e.g. to rework semantic of some gfp flags and that is
> something that users usualy get wrong and never heard that an out of
> tree code would insist on an old semantic and pushing us to the corner.
>
> > > So in this particular case I would go with consistency and export the
> > > same way we do with other functions. Also we do not want people to
> > > reinvent this API and screw that like we have seen in other cases when
> > > external modules try reimplement core functionality themselves.
> >
> > Consistency is a weak argument when the cost to the upstream community
> > is negligible. If the same functionality was available via another /
> > already exported interface *that* would be an argument to maintain the
> > existing export policy. "Consistency" in and of itself is not a
> > precedent we can use more widely in default export-type decisions.
> >
> > Effectively I'm arguing EXPORT_SYMBOL_GPL by default with a later
> > decision to drop the _GPL. Similar to how we are careful to mark sysfs
> > interfaces in Documentation/ABI/ that we are not fully committed to
> > maintaining over time, or are otherwise so new that there is not yet a
> > good read on whether they can be made permanent.
>
> Documentation/process/stable-api-nonsense.rst

That document has failed to preclude symbol export fights in the past
and there is a reasonable argument to try not to retract functionality
that had been previously exported regardless of that document.

> Really. If you want to play with GPL vs. EXPORT_SYMBOL else this is up
> to you but I do not see any technical argument to make this particular
> interface to the page allocator any different from all others that are
> exported to modules.

I'm failing to find any practical substance to your argument, but in
the end I agree with Chrishoph, it's up to MM maintainers.

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-19 19:27             ` Jason Gunthorpe
  2019-06-19 19:46               ` Dan Williams
@ 2019-06-26  3:15               ` John Hubbard
  2019-06-26  5:45                 ` Michal Hocko
  1 sibling, 1 reply; 103+ messages in thread
From: John Hubbard @ 2019-06-26  3:15 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Ira Weiny, Ralph Campbell, linux-nvdimm, nouveau, linux-kernel,
	dri-devel, linux-mm, Jérôme Glisse, Ben Skeggs,
	linux-pci, Christoph Hellwig

On 6/19/19 12:27 PM, Jason Gunthorpe wrote:
> On Thu, Jun 13, 2019 at 06:23:04PM -0700, John Hubbard wrote:
>> On 6/13/19 5:43 PM, Ira Weiny wrote:
>>> On Thu, Jun 13, 2019 at 07:58:29PM +0000, Jason Gunthorpe wrote:
>>>> On Thu, Jun 13, 2019 at 12:53:02PM -0700, Ralph Campbell wrote:
>>>>>
>> ...
>>> So I think it is ok.  Frankly I was wondering if we should remove the public
>>> type altogether but conceptually it seems ok.  But I don't see any users of it
>>> so...  should we get rid of it in the code rather than turning the config off?
>>>
>>> Ira
>>
>> That seems reasonable. I recall that the hope was for those IBM Power 9
>> systems to use _PUBLIC, as they have hardware-based coherent device (GPU)
>> memory, and so the memory really is visible to the CPU. And the IBM team
>> was thinking of taking advantage of it. But I haven't seen anything on
>> that front for a while.
> 
> Does anyone know who those people are and can we encourage them to
> send some patches? :)
> 

I asked about this, and it seems that the idea was: DEVICE_PUBLIC was there
in order to provide an alternative way to do things (such as migrate memory
to and from a device), in case the combination of existing and near-future
NUMA APIs was insufficient. This probably came as a follow-up to the early
2017-ish conversations about NUMA, in which the linux-mm recommendation was
"try using HMM mechanisms, and if those are inadequate, then maybe we can
look at enhancing NUMA so that it has better handling of advanced (GPU-like)
devices".

In the end, however, _PUBLIC was never used, nor does anyone in the local
(NVIDIA + IBM) kernel vicinity seem to have plans to use it.  So it really
does seem safe to remove, although of course it's good to start with 
BROKEN and see if anyone pops up and complains.

thanks,
-- 
John Hubbard
NVIDIA

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-26  3:15               ` John Hubbard
@ 2019-06-26  5:45                 ` Michal Hocko
  2019-06-26  6:07                   ` John Hubbard
  0 siblings, 1 reply; 103+ messages in thread
From: Michal Hocko @ 2019-06-26  5:45 UTC (permalink / raw)
  To: John Hubbard
  Cc: Jason Gunthorpe, Ira Weiny, Ralph Campbell, linux-nvdimm,
	nouveau, linux-kernel, dri-devel, linux-mm,
	Jérôme Glisse, Ben Skeggs, linux-pci,
	Christoph Hellwig

On Tue 25-06-19 20:15:28, John Hubbard wrote:
> On 6/19/19 12:27 PM, Jason Gunthorpe wrote:
> > On Thu, Jun 13, 2019 at 06:23:04PM -0700, John Hubbard wrote:
> >> On 6/13/19 5:43 PM, Ira Weiny wrote:
> >>> On Thu, Jun 13, 2019 at 07:58:29PM +0000, Jason Gunthorpe wrote:
> >>>> On Thu, Jun 13, 2019 at 12:53:02PM -0700, Ralph Campbell wrote:
> >>>>>
> >> ...
> >>> So I think it is ok.  Frankly I was wondering if we should remove the public
> >>> type altogether but conceptually it seems ok.  But I don't see any users of it
> >>> so...  should we get rid of it in the code rather than turning the config off?
> >>>
> >>> Ira
> >>
> >> That seems reasonable. I recall that the hope was for those IBM Power 9
> >> systems to use _PUBLIC, as they have hardware-based coherent device (GPU)
> >> memory, and so the memory really is visible to the CPU. And the IBM team
> >> was thinking of taking advantage of it. But I haven't seen anything on
> >> that front for a while.
> > 
> > Does anyone know who those people are and can we encourage them to
> > send some patches? :)
> > 
> 
> I asked about this, and it seems that the idea was: DEVICE_PUBLIC was there
> in order to provide an alternative way to do things (such as migrate memory
> to and from a device), in case the combination of existing and near-future
> NUMA APIs was insufficient. This probably came as a follow-up to the early
> 2017-ish conversations about NUMA, in which the linux-mm recommendation was
> "try using HMM mechanisms, and if those are inadequate, then maybe we can
> look at enhancing NUMA so that it has better handling of advanced (GPU-like)
> devices".

Yes that was the original idea. It sounds so much better to use a common
framework rather than awkward special cased cpuless NUMA nodes with
a weird semantic. User of the neither of the two has shown up so I guess
that the envisioned HW just didn't materialized. Or has there been a
completely different approach chosen?

> In the end, however, _PUBLIC was never used, nor does anyone in the local
> (NVIDIA + IBM) kernel vicinity seem to have plans to use it.  So it really
> does seem safe to remove, although of course it's good to start with 
> BROKEN and see if anyone pops up and complains.

Well, I do not really see much of a difference. Preserving an unused
code which doesn't have any user in sight just adds a maintenance burden
whether the code depends on BROKEN or not. We can always revert patches
which remove the code once a real user shows up.
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 05/22] mm: export alloc_pages_vma
  2019-06-25 19:52               ` Dan Williams
@ 2019-06-26  5:46                 ` Michal Hocko
  2019-06-26 16:14                   ` Dan Williams
  0 siblings, 1 reply; 103+ messages in thread
From: Michal Hocko @ 2019-06-26  5:46 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, Linux MM, nouveau, Maling list - DRI developers,
	linux-nvdimm, linux-pci, Linux Kernel Mailing List

On Tue 25-06-19 12:52:18, Dan Williams wrote:
[...]
> > Documentation/process/stable-api-nonsense.rst
> 
> That document has failed to preclude symbol export fights in the past
> and there is a reasonable argument to try not to retract functionality
> that had been previously exported regardless of that document.

Can you point me to any specific example where this would be the case
for the core kernel symbols please?
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken
  2019-06-26  5:45                 ` Michal Hocko
@ 2019-06-26  6:07                   ` John Hubbard
  0 siblings, 0 replies; 103+ messages in thread
From: John Hubbard @ 2019-06-26  6:07 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Jason Gunthorpe, Ira Weiny, Ralph Campbell, linux-nvdimm,
	nouveau, linux-kernel, dri-devel, linux-mm,
	Jérôme Glisse, Ben Skeggs, linux-pci,
	Christoph Hellwig

On 6/25/19 10:45 PM, Michal Hocko wrote:
> On Tue 25-06-19 20:15:28, John Hubbard wrote:
>> On 6/19/19 12:27 PM, Jason Gunthorpe wrote:
>>> On Thu, Jun 13, 2019 at 06:23:04PM -0700, John Hubbard wrote:
>>>> On 6/13/19 5:43 PM, Ira Weiny wrote:
>>>>> On Thu, Jun 13, 2019 at 07:58:29PM +0000, Jason Gunthorpe wrote:
>>>>>> On Thu, Jun 13, 2019 at 12:53:02PM -0700, Ralph Campbell wrote:
>>>>>>>
>>>> ...
>>>>> So I think it is ok.  Frankly I was wondering if we should remove the public
>>>>> type altogether but conceptually it seems ok.  But I don't see any users of it
>>>>> so...  should we get rid of it in the code rather than turning the config off?
>>>>>
>>>>> Ira
>>>>
>>>> That seems reasonable. I recall that the hope was for those IBM Power 9
>>>> systems to use _PUBLIC, as they have hardware-based coherent device (GPU)
>>>> memory, and so the memory really is visible to the CPU. And the IBM team
>>>> was thinking of taking advantage of it. But I haven't seen anything on
>>>> that front for a while.
>>>
>>> Does anyone know who those people are and can we encourage them to
>>> send some patches? :)
>>>
>>
>> I asked about this, and it seems that the idea was: DEVICE_PUBLIC was there
>> in order to provide an alternative way to do things (such as migrate memory
>> to and from a device), in case the combination of existing and near-future
>> NUMA APIs was insufficient. This probably came as a follow-up to the early
>> 2017-ish conversations about NUMA, in which the linux-mm recommendation was
>> "try using HMM mechanisms, and if those are inadequate, then maybe we can
>> look at enhancing NUMA so that it has better handling of advanced (GPU-like)
>> devices".
> 
> Yes that was the original idea. It sounds so much better to use a common
> framework rather than awkward special cased cpuless NUMA nodes with
> a weird semantic. User of the neither of the two has shown up so I guess
> that the envisioned HW just didn't materialized. Or has there been a
> completely different approach chosen?

The HW showed up, alright: it's the IBM Power 9, which provides HW-based
memory coherency between its CPUs and GPUs. So on this system, the CPU is
allowed to access GPU memory, which *could* be modeled as DEVICE_PUBLIC.

However, what happened was that the system worked well enough with a combination
of the device driver, plus NUMA APIs, plus heaven knows what sort of /proc tuning
might have also gone on. :) No one saw the need to reach for the DEVICE_PUBLIC
functionality.

> 
>> In the end, however, _PUBLIC was never used, nor does anyone in the local
>> (NVIDIA + IBM) kernel vicinity seem to have plans to use it.  So it really
>> does seem safe to remove, although of course it's good to start with 
>> BROKEN and see if anyone pops up and complains.
> 
> Well, I do not really see much of a difference. Preserving an unused
> code which doesn't have any user in sight just adds a maintenance burden
> whether the code depends on BROKEN or not. We can always revert patches
> which remove the code once a real user shows up.

Sure, I don't see much difference either. Either way seems fine.

thanks,
-- 
John Hubbard
NVIDIA

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 05/22] mm: export alloc_pages_vma
  2019-06-26  5:46                 ` Michal Hocko
@ 2019-06-26 16:14                   ` Dan Williams
  2019-06-27  6:41                     ` Michal Hocko
  0 siblings, 1 reply; 103+ messages in thread
From: Dan Williams @ 2019-06-26 16:14 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Christoph Hellwig, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, Linux MM, nouveau, Maling list - DRI developers,
	linux-nvdimm, linux-pci, Linux Kernel Mailing List

On Tue, Jun 25, 2019 at 10:46 PM Michal Hocko <mhocko@kernel.org> wrote:
>
> On Tue 25-06-19 12:52:18, Dan Williams wrote:
> [...]
> > > Documentation/process/stable-api-nonsense.rst
> >
> > That document has failed to preclude symbol export fights in the past
> > and there is a reasonable argument to try not to retract functionality
> > that had been previously exported regardless of that document.
>
> Can you point me to any specific example where this would be the case
> for the core kernel symbols please?

The most recent example that comes to mind was the thrash around
__kernel_fpu_{begin,end} [1]. I referenced that when debating _GPL
symbol policy with Jérôme [2].

[1]: https://lore.kernel.org/lkml/20190522100959.GA15390@kroah.com/
[2]: https://lore.kernel.org/lkml/CAPcyv4gb+r==riKFXkVZ7gGdnKe62yBmZ7xOa4uBBByhnK9Tzg@mail.gmail.com/

^ permalink raw reply	[flat|nested] 103+ messages in thread

* Re: [PATCH 05/22] mm: export alloc_pages_vma
  2019-06-26 16:14                   ` Dan Williams
@ 2019-06-27  6:41                     ` Michal Hocko
  0 siblings, 0 replies; 103+ messages in thread
From: Michal Hocko @ 2019-06-27  6:41 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, Jérôme Glisse, Jason Gunthorpe,
	Ben Skeggs, Linux MM, nouveau, Maling list - DRI developers,
	linux-nvdimm, linux-pci, Linux Kernel Mailing List

On Wed 26-06-19 09:14:32, Dan Williams wrote:
> On Tue, Jun 25, 2019 at 10:46 PM Michal Hocko <mhocko@kernel.org> wrote:
> >
> > On Tue 25-06-19 12:52:18, Dan Williams wrote:
> > [...]
> > > > Documentation/process/stable-api-nonsense.rst
> > >
> > > That document has failed to preclude symbol export fights in the past
> > > and there is a reasonable argument to try not to retract functionality
> > > that had been previously exported regardless of that document.
> >
> > Can you point me to any specific example where this would be the case
> > for the core kernel symbols please?
> 
> The most recent example that comes to mind was the thrash around
> __kernel_fpu_{begin,end} [1].

Well, this seems more like a disagreement over a functionality that has
reduced its visibility rather than enforcement of a specific API. And I
do agree that the above document states that this is perfectly
legitimate and no out-of-tree code can rely on _any_ functionality to be
preserved.

On the other hand, I am not really surprised about the discussion
because d63e79b114c02 is a mere clean up not explaining why the
functionality should be restricted to GPL only code. So there certainly
is a room for clarification. Especially when the code has been exported
without this restriction in the past (see 8546c008924d5). So to me this
sounds more like a usual EXPORT_SYMBOL{_GPL} mess.

In any case I really do not see any relation to the maintenance cost
here. GPL symbols are not in any sense more stable than any other
exported symbol. They can change at any time. The only maintenance
burden is to update all _in_kernel_ users of the said symbol. Any
out-of-tree code is on its own to deal with this. Full stop.

GPL or non-GPL symbols are solely to define a scope of the usage.
Nothing less and nothing more.

> I referenced that when debating _GPL symbol policy with Jérôme [2].
> 
> [1]: https://lore.kernel.org/lkml/20190522100959.GA15390@kroah.com/
> [2]: https://lore.kernel.org/lkml/CAPcyv4gb+r==riKFXkVZ7gGdnKe62yBmZ7xOa4uBBByhnK9Tzg@mail.gmail.com/

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 103+ messages in thread

end of thread, back to index

Thread overview: 103+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-06-13  9:43 dev_pagemap related cleanups Christoph Hellwig
2019-06-13  9:43 ` [PATCH 01/22] mm: remove the unused ARCH_HAS_HMM_DEVICE Kconfig option Christoph Hellwig
2019-06-13 18:30   ` Jason Gunthorpe
2019-06-13  9:43 ` [PATCH 02/22] mm: remove the struct hmm_device infrastructure Christoph Hellwig
2019-06-13 18:46   ` Jason Gunthorpe
2019-06-13 23:06   ` [Nouveau] " John Hubbard
2019-06-13  9:43 ` [PATCH 03/22] mm: remove hmm_devmem_add_resource Christoph Hellwig
2019-06-13 18:52   ` Jason Gunthorpe
2019-06-14  6:19     ` Christoph Hellwig
2019-06-14  0:54   ` [Nouveau] " John Hubbard
2019-06-20 19:32   ` Michal Hocko
2019-06-13  9:43 ` [PATCH 04/22] mm: don't clear ->mapping in hmm_devmem_free Christoph Hellwig
2019-06-13 19:05   ` Jason Gunthorpe
2019-06-14  6:21     ` Christoph Hellwig
2019-06-14  1:46   ` John Hubbard
2019-06-20 19:36   ` Michal Hocko
2019-06-13  9:43 ` [PATCH 05/22] mm: export alloc_pages_vma Christoph Hellwig
2019-06-14  1:47   ` John Hubbard
2019-06-14  6:23     ` Christoph Hellwig
2019-06-20 19:17   ` Michal Hocko
2019-06-24 18:24     ` Dan Williams
2019-06-25  7:23       ` Christoph Hellwig
2019-06-25 15:00         ` Michal Hocko
2019-06-25 18:03           ` Dan Williams
2019-06-25 19:00             ` Michal Hocko
2019-06-25 19:52               ` Dan Williams
2019-06-26  5:46                 ` Michal Hocko
2019-06-26 16:14                   ` Dan Williams
2019-06-27  6:41                     ` Michal Hocko
2019-06-13  9:43 ` [PATCH 06/22] mm: factor out a devm_request_free_mem_region helper Christoph Hellwig
2019-06-13 19:16   ` Jason Gunthorpe
2019-06-14  6:24     ` Christoph Hellwig
2019-06-15  2:21   ` John Hubbard
2019-06-15 14:30     ` Christoph Hellwig
2019-06-13  9:43 ` [PATCH 07/22] memremap: move dev_pagemap callbacks into a separate structure Christoph Hellwig
2019-06-13 19:18   ` Jason Gunthorpe
2019-06-13 20:14   ` Logan Gunthorpe
2019-06-13  9:43 ` [PATCH 08/22] memremap: pass a struct dev_pagemap to ->kill Christoph Hellwig
2019-06-13 19:26   ` Jason Gunthorpe
2019-06-13 20:12   ` Logan Gunthorpe
2019-06-13 20:15     ` Dan Williams
2019-06-13  9:43 ` [PATCH 09/22] memremap: lift the devmap_enable manipulation into devm_memremap_pages Christoph Hellwig
2019-06-13 19:34   ` Jason Gunthorpe
2019-06-13 20:13     ` Dan Williams
2019-06-14  6:28     ` Christoph Hellwig
2019-06-13  9:43 ` [PATCH 10/22] memremap: add a migrate callback to struct dev_pagemap_ops Christoph Hellwig
2019-06-13 23:42   ` Ralph Campbell
2019-06-14  6:33     ` Christoph Hellwig
2019-06-13  9:43 ` [PATCH 11/22] memremap: remove the data field in struct dev_pagemap Christoph Hellwig
2019-06-13 19:37   ` Jason Gunthorpe
2019-06-13  9:43 ` [PATCH 12/22] memremap: provide an optional internal refcount " Christoph Hellwig
2019-06-13  9:43 ` [PATCH 13/22] device-dax: use the dev_pagemap internal refcount Christoph Hellwig
2019-06-14  0:22   ` Ira Weiny
2019-06-14  6:35     ` Christoph Hellwig
2019-06-13  9:43 ` [PATCH 14/22] nouveau: use alloc_page_vma directly Christoph Hellwig
2019-06-13 19:39   ` Jason Gunthorpe
2019-06-13  9:43 ` [PATCH 15/22] nouveau: use devm_memremap_pages directly Christoph Hellwig
2019-06-13  9:43 ` [PATCH 16/22] mm: remove hmm_vma_alloc_locked_page Christoph Hellwig
2019-06-13  9:43 ` [PATCH 17/22] mm: remove hmm_devmem_add Christoph Hellwig
2019-06-13 19:42   ` Jason Gunthorpe
2019-06-14  6:39     ` Christoph Hellwig
2019-06-13  9:43 ` [PATCH 18/22] mm: mark DEVICE_PUBLIC as broken Christoph Hellwig
2019-06-13 19:44   ` Jason Gunthorpe
2019-06-13 19:53     ` Ralph Campbell
2019-06-13 19:58       ` Jason Gunthorpe
2019-06-14  0:43         ` Ira Weiny
2019-06-14  1:23           ` John Hubbard
2019-06-19 19:27             ` Jason Gunthorpe
2019-06-19 19:46               ` Dan Williams
2019-06-26  3:15               ` John Hubbard
2019-06-26  5:45                 ` Michal Hocko
2019-06-26  6:07                   ` John Hubbard
2019-06-14  6:43           ` Christoph Hellwig
2019-06-20 19:26   ` Michal Hocko
2019-06-25  7:29     ` Christoph Hellwig
2019-06-25 11:44       ` Jason Gunthorpe
2019-06-25 11:59         ` Christoph Hellwig
2019-06-13  9:43 ` [PATCH 19/22] mm: simplify ZONE_DEVICE page private data Christoph Hellwig
2019-06-13  9:43 ` [PATCH 20/22] mm: sort out the DEVICE_PRIVATE Kconfig mess Christoph Hellwig
2019-06-13 19:55   ` Jason Gunthorpe
2019-06-13  9:43 ` [PATCH 21/22] mm: remove the HMM config option Christoph Hellwig
2019-06-13 20:01   ` Jason Gunthorpe
2019-06-14  6:47     ` Christoph Hellwig
2019-06-13  9:43 ` [PATCH 22/22] mm: don't select MIGRATE_VMA_HELPER from HMM_MIRROR Christoph Hellwig
2019-06-13 20:04   ` Jason Gunthorpe
2019-06-14  1:53   ` [Nouveau] " John Hubbard
2019-06-14  6:48     ` Christoph Hellwig
2019-06-13 14:16 ` dev_pagemap related cleanups Jason Gunthorpe
2019-06-14  6:12   ` Christoph Hellwig
2019-06-13 18:27 ` Dan Williams
2019-06-13 20:17   ` Logan Gunthorpe
2019-06-13 20:21     ` Dan Williams
2019-06-13 20:24       ` Logan Gunthorpe
2019-06-13 20:48         ` Andrew Morton
2019-06-13 20:40   ` Jason Gunthorpe
2019-06-13 21:21     ` Christoph Hellwig
2019-06-13 23:10       ` Jason Gunthorpe
2019-06-14  6:14         ` Christoph Hellwig
2019-06-14  0:31     ` Ira Weiny
2019-06-14  6:13   ` Christoph Hellwig
2019-06-15  1:14     ` Dan Williams
2019-06-15  8:33       ` Christoph Hellwig
2019-06-15 18:09         ` Dan Williams

Linux-PCI Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-pci/0 linux-pci/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-pci linux-pci/ https://lore.kernel.org/linux-pci \
		linux-pci@vger.kernel.org
	public-inbox-index linux-pci

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-pci


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git