linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] vmalloc: introduce vmap_pfn for persistent memory
@ 2017-11-07 22:03 Mikulas Patocka
  2017-11-08  9:59 ` Christoph Hellwig
  0 siblings, 1 reply; 23+ messages in thread
From: Mikulas Patocka @ 2017-11-07 22:03 UTC (permalink / raw)
  To: Ross Zwisler, linux-mm, linux-nvdimm, Dan Williams
  Cc: dm-devel, Laura Abbott, Christoph Hellwig, Kirill A . Shutemov

Hi

I am developing a driver that uses persistent memory for caching. A 
persistent memory device can be mapped in several discontiguous ranges.

The kernel has a function vmap that takes an array of pointers to pages 
and maps these pages to contiguous linear address space. However, it can't 
be used on persistent memory because persistent memory may not be backed 
by page structures.

This patch introduces a new function vmap_pfn, it works like vmap, but 
takes an array of pfn_t - so it can be used on persistent memory.

This is an example how vmap_pfn is used: 
https://www.redhat.com/archives/dm-devel/2017-November/msg00026.html (see 
the function persistent_memory_claim)

Mikulas



From: Mikulas Patocka <mpatocka@redhat.com>

There's a function vmap that can take discontiguous pages and map them 
linearly to the vmalloc space. However, persistent memory may not be 
backed by pages, so we can't use vmap on it.

This patch introduces a function vmap_pfn that works like vmap, but it 
takes an array of page frame numbers (pfn_t). It can be used to remap 
discontiguous chunks of persistent memory into a linear range.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 include/linux/vmalloc.h |    2 +
 mm/vmalloc.c            |   88 +++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 71 insertions(+), 19 deletions(-)

Index: linux-2.6/include/linux/vmalloc.h
===================================================================
--- linux-2.6.orig/include/linux/vmalloc.h
+++ linux-2.6/include/linux/vmalloc.h
@@ -98,6 +98,8 @@ extern void vfree_atomic(const void *add
 
 extern void *vmap(struct page **pages, unsigned int count,
 			unsigned long flags, pgprot_t prot);
+extern void *vmap_pfn(pfn_t *pfns, unsigned int count,
+			unsigned long flags, pgprot_t prot);
 extern void vunmap(const void *addr);
 
 extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
Index: linux-2.6/mm/vmalloc.c
===================================================================
--- linux-2.6.orig/mm/vmalloc.c
+++ linux-2.6/mm/vmalloc.c
@@ -31,6 +31,7 @@
 #include <linux/compiler.h>
 #include <linux/llist.h>
 #include <linux/bitops.h>
+#include <linux/pfn_t.h>
 
 #include <linux/uaccess.h>
 #include <asm/tlbflush.h>
@@ -132,7 +133,7 @@ static void vunmap_page_range(unsigned l
 }
 
 static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+		unsigned long end, pgprot_t prot, struct page **pages, pfn_t *pfns, int *nr)
 {
 	pte_t *pte;
 
@@ -145,20 +146,25 @@ static int vmap_pte_range(pmd_t *pmd, un
 	if (!pte)
 		return -ENOMEM;
 	do {
-		struct page *page = pages[*nr];
-
+		unsigned long pf;
+		if (pages) {
+			struct page *page = pages[*nr];
+			if (WARN_ON(!page))
+				return -ENOMEM;
+			pf = page_to_pfn(page);
+		} else {
+			pf = pfn_t_to_pfn(pfns[*nr]);
+		}
 		if (WARN_ON(!pte_none(*pte)))
 			return -EBUSY;
-		if (WARN_ON(!page))
-			return -ENOMEM;
-		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
+		set_pte_at(&init_mm, addr, pte, pfn_pte(pf, prot));
 		(*nr)++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	return 0;
 }
 
 static int vmap_pmd_range(pud_t *pud, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+		unsigned long end, pgprot_t prot, struct page **pages, pfn_t *pfns, int *nr)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -168,14 +174,14 @@ static int vmap_pmd_range(pud_t *pud, un
 		return -ENOMEM;
 	do {
 		next = pmd_addr_end(addr, end);
-		if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
+		if (vmap_pte_range(pmd, addr, next, prot, pages, pfns, nr))
 			return -ENOMEM;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
 }
 
 static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+		unsigned long end, pgprot_t prot, struct page **pages, pfn_t *pfns, int *nr)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -185,14 +191,14 @@ static int vmap_pud_range(p4d_t *p4d, un
 		return -ENOMEM;
 	do {
 		next = pud_addr_end(addr, end);
-		if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
+		if (vmap_pmd_range(pud, addr, next, prot, pages, pfns, nr))
 			return -ENOMEM;
 	} while (pud++, addr = next, addr != end);
 	return 0;
 }
 
 static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
-		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
+		unsigned long end, pgprot_t prot, struct page **pages, pfn_t *pfns, int *nr)
 {
 	p4d_t *p4d;
 	unsigned long next;
@@ -202,7 +208,7 @@ static int vmap_p4d_range(pgd_t *pgd, un
 		return -ENOMEM;
 	do {
 		next = p4d_addr_end(addr, end);
-		if (vmap_pud_range(p4d, addr, next, prot, pages, nr))
+		if (vmap_pud_range(p4d, addr, next, prot, pages, pfns, nr))
 			return -ENOMEM;
 	} while (p4d++, addr = next, addr != end);
 	return 0;
@@ -215,7 +221,7 @@ static int vmap_p4d_range(pgd_t *pgd, un
  * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
  */
 static int vmap_page_range_noflush(unsigned long start, unsigned long end,
-				   pgprot_t prot, struct page **pages)
+				   pgprot_t prot, struct page **pages, pfn_t *pfns)
 {
 	pgd_t *pgd;
 	unsigned long next;
@@ -227,7 +233,7 @@ static int vmap_page_range_noflush(unsig
 	pgd = pgd_offset_k(addr);
 	do {
 		next = pgd_addr_end(addr, end);
-		err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr);
+		err = vmap_p4d_range(pgd, addr, next, prot, pages, pfns, &nr);
 		if (err)
 			return err;
 	} while (pgd++, addr = next, addr != end);
@@ -236,11 +242,11 @@ static int vmap_page_range_noflush(unsig
 }
 
 static int vmap_page_range(unsigned long start, unsigned long end,
-			   pgprot_t prot, struct page **pages)
+			   pgprot_t prot, struct page **pages, pfn_t *pfns)
 {
 	int ret;
 
-	ret = vmap_page_range_noflush(start, end, prot, pages);
+	ret = vmap_page_range_noflush(start, end, prot, pages, pfns);
 	flush_cache_vmap(start, end);
 	return ret;
 }
@@ -1191,7 +1197,7 @@ void *vm_map_ram(struct page **pages, un
 		addr = va->va_start;
 		mem = (void *)addr;
 	}
-	if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
+	if (vmap_page_range(addr, addr + size, prot, pages, NULL) < 0) {
 		vm_unmap_ram(mem, count);
 		return NULL;
 	}
@@ -1306,7 +1312,7 @@ void __init vmalloc_init(void)
 int map_kernel_range_noflush(unsigned long addr, unsigned long size,
 			     pgprot_t prot, struct page **pages)
 {
-	return vmap_page_range_noflush(addr, addr + size, prot, pages);
+	return vmap_page_range_noflush(addr, addr + size, prot, pages, NULL);
 }
 
 /**
@@ -1347,13 +1353,24 @@ void unmap_kernel_range(unsigned long ad
 }
 EXPORT_SYMBOL_GPL(unmap_kernel_range);
 
+static int map_vm_area_pfn(struct vm_struct *area, pgprot_t prot, pfn_t *pfns)
+{
+	unsigned long addr = (unsigned long)area->addr;
+	unsigned long end = addr + get_vm_area_size(area);
+	int err;
+
+	err = vmap_page_range(addr, end, prot, NULL, pfns);
+
+	return err > 0 ? 0 : err;
+}
+
 int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
 {
 	unsigned long addr = (unsigned long)area->addr;
 	unsigned long end = addr + get_vm_area_size(area);
 	int err;
 
-	err = vmap_page_range(addr, end, prot, pages);
+	err = vmap_page_range(addr, end, prot, pages, NULL);
 
 	return err > 0 ? 0 : err;
 }
@@ -1660,6 +1677,39 @@ void *vmap(struct page **pages, unsigned
 }
 EXPORT_SYMBOL(vmap);
 
+/**
+ *	vmap_pfn  -  map an array of pages into virtually contiguous space
+ *	@pfns:		array of page frame numbers
+ *	@count:		number of pages to map
+ *	@flags:		vm_area->flags
+ *	@prot:		page protection for the mapping
+ *
+ *	Maps @count pages from @pages into contiguous kernel virtual
+ *	space.
+ */
+void *vmap_pfn(pfn_t *pfns, unsigned int count, unsigned long flags, pgprot_t prot)
+{
+	struct vm_struct *area;
+	unsigned long size;		/* In bytes */
+
+	might_sleep();
+
+	size = (unsigned long)count << PAGE_SHIFT;
+	if (unlikely((size >> PAGE_SHIFT) != count))
+		return NULL;
+	area = get_vm_area_caller(size, flags, __builtin_return_address(0));
+	if (!area)
+		return NULL;
+
+	if (map_vm_area_pfn(area, prot, pfns)) {
+		vunmap(area->addr);
+		return NULL;
+	}
+
+	return area->addr;
+}
+EXPORT_SYMBOL(vmap_pfn);
+
 static void *__vmalloc_node(unsigned long size, unsigned long align,
 			    gfp_t gfp_mask, pgprot_t prot,
 			    int node, const void *caller);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2017-11-09 18:58 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-07 22:03 [PATCH] vmalloc: introduce vmap_pfn for persistent memory Mikulas Patocka
2017-11-08  9:59 ` Christoph Hellwig
2017-11-08 12:33   ` Mikulas Patocka
2017-11-08 15:04     ` Christoph Hellwig
2017-11-08 15:21       ` Mikulas Patocka
2017-11-08 15:35         ` [dm-devel] " Christoph Hellwig
2017-11-08 15:41           ` Dan Williams
2017-11-08 20:15             ` Mikulas Patocka
2017-11-08 20:25               ` Dan Williams
2017-11-09 16:40                 ` Mikulas Patocka
2017-11-09 16:45                   ` Dan Williams
2017-11-09 17:30                     ` Mikulas Patocka
2017-11-09 17:35                       ` Dan Williams
2017-11-08 17:42           ` Mikulas Patocka
2017-11-08 17:47             ` Christoph Hellwig
2017-11-08 20:26               ` Mikulas Patocka
2017-11-08 21:26                 ` Dan Williams
2017-11-09 16:37                   ` Mikulas Patocka
2017-11-09 16:49                     ` Dan Williams
2017-11-09 18:13                       ` Mikulas Patocka
2017-11-09 18:38                         ` Dan Williams
2017-11-09 18:51                           ` Mikulas Patocka
2017-11-09 18:58                             ` Dan Williams

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).