All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: akpm@linux-foundation.org
Cc: linux-nvdimm@lists.01.org, Dave Chinner <david@fromorbit.com>,
	linux-mm@kvack.org, Ross Zwisler <ross.zwisler@linux.intel.com>,
	Logan Gunthorpe <logang@deltatee.com>,
	Christoph Hellwig <hch@lst.de>
Subject: [-mm PATCH v4 04/18] mm: introduce find_dev_pagemap()
Date: Sun, 20 Dec 2015 21:44:28 -0800	[thread overview]
Message-ID: <20151221054428.34542.24500.stgit@dwillia2-desk3.jf.intel.com> (raw)
In-Reply-To: <20151221054406.34542.64393.stgit@dwillia2-desk3.jf.intel.com>

There are several scenarios where we need to retrieve and update
metadata associated with a given devm_memremap_pages() mapping, and the
only lookup key available is a pfn in the range:

1/ We want to augment vmemmap_populate() (called via arch_add_memory())
   to allocate memmap storage from pre-allocated pages reserved by the
   device driver.  At vmemmap_alloc_block_buf() time it grabs device pages
   rather than page allocator pages.  This is in support of
   devm_memremap_pages() mappings where the memmap is too large to fit in
   main memory (i.e. large persistent memory devices).

2/ Taking a reference against the mapping when inserting device pages
   into the address_space radix of a given inode.  This facilitates
   unmap_mapping_range() and truncate_inode_pages() operations when the
   driver is tearing down the mapping.

3/ get_user_pages() operations on ZONE_DEVICE memory require taking a
   reference against the mapping so that the driver teardown path can
   revoke and drain usage of device pages.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Tested-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/pmem.c    |    2 +
 include/linux/io.h       |   15 --------
 include/linux/memremap.h |   38 +++++++++++++++++++++
 kernel/memremap.c        |   85 ++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 116 insertions(+), 24 deletions(-)
 create mode 100644 include/linux/memremap.h

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 755bf8aefcf2..2afb24ba5a90 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -21,8 +21,8 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/module.h>
-#include <linux/memory_hotplug.h>
 #include <linux/moduleparam.h>
+#include <linux/memremap.h>
 #include <linux/vmalloc.h>
 #include <linux/pfn_t.h>
 #include <linux/slab.h>
diff --git a/include/linux/io.h b/include/linux/io.h
index de64c1e53612..fffd88d7f426 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -89,21 +89,6 @@ void devm_memunmap(struct device *dev, void *addr);
 
 void *__devm_memremap_pages(struct device *dev, struct resource *res);
 
-#ifdef CONFIG_ZONE_DEVICE
-void *devm_memremap_pages(struct device *dev, struct resource *res);
-#else
-static inline void *devm_memremap_pages(struct device *dev, struct resource *res)
-{
-	/*
-	 * Fail attempts to call devm_memremap_pages() without
-	 * ZONE_DEVICE support enabled, this requires callers to fall
-	 * back to plain devm_memremap() based on config
-	 */
-	WARN_ON_ONCE(1);
-	return ERR_PTR(-ENXIO);
-}
-#endif
-
 /*
  * Some systems do not have legacy ISA devices.
  * /dev/port is not a valid interface on these systems.
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
new file mode 100644
index 000000000000..d90721c178bb
--- /dev/null
+++ b/include/linux/memremap.h
@@ -0,0 +1,38 @@
+#ifndef _LINUX_MEMREMAP_H_
+#define _LINUX_MEMREMAP_H_
+#include <linux/mm.h>
+
+struct resource;
+struct device;
+/**
+ * struct dev_pagemap - metadata for ZONE_DEVICE mappings
+ * @dev: host device of the mapping for debug
+ */
+struct dev_pagemap {
+	/* TODO: vmem_altmap and percpu_ref count */
+	struct device *dev;
+};
+
+#ifdef CONFIG_ZONE_DEVICE
+void *devm_memremap_pages(struct device *dev, struct resource *res);
+struct dev_pagemap *find_dev_pagemap(resource_size_t phys);
+#else
+static inline void *devm_memremap_pages(struct device *dev,
+		struct resource *res)
+{
+	/*
+	 * Fail attempts to call devm_memremap_pages() without
+	 * ZONE_DEVICE support enabled, this requires callers to fall
+	 * back to plain devm_memremap() based on config
+	 */
+	WARN_ON_ONCE(1);
+	return ERR_PTR(-ENXIO);
+}
+
+static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
+{
+	return NULL;
+}
+#endif
+
+#endif /* _LINUX_MEMREMAP_H_ */
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 449cb6a5d9a1..61cfbf4d3054 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -10,6 +10,8 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  */
+#include <linux/radix-tree.h>
+#include <linux/memremap.h>
 #include <linux/device.h>
 #include <linux/types.h>
 #include <linux/pfn_t.h>
@@ -155,22 +157,57 @@ pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
 EXPORT_SYMBOL(phys_to_pfn_t);
 
 #ifdef CONFIG_ZONE_DEVICE
+static DEFINE_MUTEX(pgmap_lock);
+static RADIX_TREE(pgmap_radix, GFP_KERNEL);
+#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
+#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
+
 struct page_map {
 	struct resource res;
+	struct percpu_ref *ref;
+	struct dev_pagemap pgmap;
 };
 
-static void devm_memremap_pages_release(struct device *dev, void *res)
+static void pgmap_radix_release(struct resource *res)
+{
+	resource_size_t key;
+
+	mutex_lock(&pgmap_lock);
+	for (key = res->start; key <= res->end; key += SECTION_SIZE)
+		radix_tree_delete(&pgmap_radix, key >> PA_SECTION_SHIFT);
+	mutex_unlock(&pgmap_lock);
+}
+
+static void devm_memremap_pages_release(struct device *dev, void *data)
 {
-	struct page_map *page_map = res;
+	struct page_map *page_map = data;
+	struct resource *res = &page_map->res;
+	resource_size_t align_start, align_size;
+
+	pgmap_radix_release(res);
 
 	/* pages are dead and unused, undo the arch mapping */
-	arch_remove_memory(page_map->res.start, resource_size(&page_map->res));
+	align_start = res->start & ~(SECTION_SIZE - 1);
+	align_size = ALIGN(resource_size(res), SECTION_SIZE);
+	arch_remove_memory(align_start, align_size);
+}
+
+/* assumes rcu_read_lock() held at entry */
+struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
+{
+	struct page_map *page_map;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	page_map = radix_tree_lookup(&pgmap_radix, phys >> PA_SECTION_SHIFT);
+	return page_map ? &page_map->pgmap : NULL;
 }
 
 void *devm_memremap_pages(struct device *dev, struct resource *res)
 {
 	int is_ram = region_intersects(res->start, resource_size(res),
 			"System RAM");
+	resource_size_t key, align_start, align_size;
 	struct page_map *page_map;
 	int error, nid;
 
@@ -190,18 +227,50 @@ void *devm_memremap_pages(struct device *dev, struct resource *res)
 
 	memcpy(&page_map->res, res, sizeof(*res));
 
+	page_map->pgmap.dev = dev;
+	mutex_lock(&pgmap_lock);
+	error = 0;
+	for (key = res->start; key <= res->end; key += SECTION_SIZE) {
+		struct dev_pagemap *dup;
+
+		rcu_read_lock();
+		dup = find_dev_pagemap(key);
+		rcu_read_unlock();
+		if (dup) {
+			dev_err(dev, "%s: %pr collides with mapping for %s\n",
+					__func__, res, dev_name(dup->dev));
+			error = -EBUSY;
+			break;
+		}
+		error = radix_tree_insert(&pgmap_radix, key >> PA_SECTION_SHIFT,
+				page_map);
+		if (error) {
+			dev_err(dev, "%s: failed: %d\n", __func__, error);
+			break;
+		}
+	}
+	mutex_unlock(&pgmap_lock);
+	if (error)
+		goto err_radix;
+
 	nid = dev_to_node(dev);
 	if (nid < 0)
 		nid = numa_mem_id();
 
-	error = arch_add_memory(nid, res->start, resource_size(res), true);
-	if (error) {
-		devres_free(page_map);
-		return ERR_PTR(error);
-	}
+	align_start = res->start & ~(SECTION_SIZE - 1);
+	align_size = ALIGN(resource_size(res), SECTION_SIZE);
+	error = arch_add_memory(nid, align_start, align_size, true);
+	if (error)
+		goto err_add_memory;
 
 	devres_add(dev, page_map);
 	return __va(res->start);
+
+ err_add_memory:
+ err_radix:
+	pgmap_radix_release(res);
+	devres_free(page_map);
+	return ERR_PTR(error);
 }
 EXPORT_SYMBOL(devm_memremap_pages);
 #endif /* CONFIG_ZONE_DEVICE */

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2015-12-21  5:44 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-21  5:44 [-mm PATCH v4 00/18] get_user_pages() for dax pte and pmd mappings Dan Williams
2015-12-21  5:44 ` [-mm PATCH v4 01/18] kvm: rename pfn_t to kvm_pfn_t Dan Williams
2015-12-21  5:44 ` [-mm PATCH v4 02/18] mm, dax, pmem: introduce pfn_t Dan Williams
2015-12-21  5:44 ` [-mm PATCH v4 03/18] mm: skip memory block registration for ZONE_DEVICE Dan Williams
2015-12-21  5:44 ` Dan Williams [this message]
2015-12-21  5:44 ` [-mm PATCH v4 05/18] x86, mm: introduce vmem_altmap to augment vmemmap_populate() Dan Williams
2015-12-27  8:40   ` Bob Liu
2015-12-21  5:44 ` [-mm PATCH v4 06/18] libnvdimm, pfn, pmem: allocate memmap array in persistent memory Dan Williams
2015-12-21  5:44 ` [-mm PATCH v4 07/18] avr32: convert to asm-generic/memory_model.h Dan Williams
2015-12-21  5:44 ` [-mm PATCH v4 08/18] hugetlb: fix compile error on tile Dan Williams
2015-12-21  5:44 ` [-mm PATCH v4 09/18] frv: fix compiler warning from definition of __pmd() Dan Williams
2015-12-21  5:45 ` [-mm PATCH v4 10/18] x86, mm: introduce _PAGE_DEVMAP Dan Williams
2015-12-21  5:45 ` [-mm PATCH v4 11/18] mm, dax, gpu: convert vm_insert_mixed to pfn_t Dan Williams
2015-12-21  5:45 ` [-mm PATCH v4 12/18] mm, dax: convert vmf_insert_pfn_pmd() " Dan Williams
2015-12-21  5:45 ` [-mm PATCH v4 13/18] libnvdimm, pmem: move request_queue allocation earlier in probe Dan Williams
2015-12-21  5:45 ` [-mm PATCH v4 14/18] mm, dax, pmem: introduce {get|put}_dev_pagemap() for dax-gup Dan Williams
2015-12-27  8:46   ` Bob Liu
2015-12-27 19:02     ` Dan Williams
2015-12-21  5:45 ` [-mm PATCH v4 15/18] mm, dax: dax-pmd vs thp-pmd vs hugetlbfs-pmd Dan Williams
2015-12-25  0:59   ` [-mm PATCH v5 " Dan Williams
2015-12-25  1:11     ` Sasha Levin
2015-12-30  5:32   ` [-mm PATCH v4 " Williams, Dan J
2015-12-21  5:45 ` [-mm PATCH v4 16/18] mm, x86: get_user_pages() for dax mappings Dan Williams
2015-12-25  1:03   ` [-mm PATCH v5 " Dan Williams
2015-12-21  5:45 ` [-mm PATCH v4 17/18] dax: provide diagnostics for pmd mapping failures Dan Williams
2015-12-21  5:45 ` [-mm PATCH v4 18/18] dax: re-enable dax pmd mappings Dan Williams
2015-12-27  8:33 ` [-mm PATCH v4 00/18] get_user_pages() for dax pte and " Bob Liu
2015-12-27 18:55   ` Dan Williams
2015-12-29  3:23     ` Bob Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151221054428.34542.24500.stgit@dwillia2-desk3.jf.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@fromorbit.com \
    --cc=hch@lst.de \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=logang@deltatee.com \
    --cc=ross.zwisler@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.