linux-nvdimm.lists.01.org archive mirror
 help / color / mirror / Atom feed
From: Joao Martins <joao.m.martins@oracle.com>
To: linux-mm@kvack.org
Cc: linux-nvdimm@lists.01.org, Matthew Wilcox <willy@infradead.org>,
	Jason Gunthorpe <jgg@ziepe.ca>,
	Muchun Song <songmuchun@bytedance.com>,
	Mike Kravetz <mike.kravetz@oracle.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Joao Martins <joao.m.martins@oracle.com>
Subject: [PATCH v1 07/11] mm/sparse-vmemmap: populate compound pagemaps
Date: Thu, 25 Mar 2021 23:09:34 +0000	[thread overview]
Message-ID: <20210325230938.30752-8-joao.m.martins@oracle.com> (raw)
In-Reply-To: <20210325230938.30752-1-joao.m.martins@oracle.com>

A compound pagemap is a dev_pagemap with @align > PAGE_SIZE and it
means that pages are mapped at a given huge page alignment and utilize
uses compound pages as opposed to order-0 pages.

To minimize struct page overhead we take advantage of the fact that
most tail pages look the same (except the first two). We allocate a
separate page for the vmemmap area which contains the head page and
separate for the next 64 pages. The rest of the subsections then reuse
this tail vmemmap page to initialize the rest of the tail pages.

Sections are arch-dependent (e.g. on x86 it's 64M, 128M or 512M) and
when initializing compound pagemap with big enough @align (e.g. 1G
PUD) it  will cross various sections. To be able to reuse tail pages
across sections belonging to the same gigantic page we fetch the
@range being mapped (nr_ranges + 1).  If the section being mapped is
not offset 0 of the @align, then lookup the PFN of the struct page
address that preceeds it and use that to populate the entire
section.

On compound pagemaps with 2M align, this lets mechanism saves 6 pages
out of the 8 necessary PFNs necessary to set the subsection's 512
struct pages being mapped. On a 1G compound pagemap it saves
4094 pages.

Altmap isn't supported yet, given various restrictions in altmap pfn
allocator, thus fallback to the already in use vmemmap_populate().

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 include/linux/mm.h  |   2 +-
 mm/memremap.c       |   1 +
 mm/sparse-vmemmap.c | 139 ++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 131 insertions(+), 11 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 61474602c2b1..49d717ae40ae 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3040,7 +3040,7 @@ p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
 pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
 pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
 pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
-			    struct vmem_altmap *altmap);
+			    struct vmem_altmap *altmap, void *block);
 void *vmemmap_alloc_block(unsigned long size, int node);
 struct vmem_altmap;
 void *vmemmap_alloc_block_buf(unsigned long size, int node,
diff --git a/mm/memremap.c b/mm/memremap.c
index d160853670c4..2e6bc0b1ff00 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -345,6 +345,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 {
 	struct mhp_params params = {
 		.altmap = pgmap_altmap(pgmap),
+		.pgmap = pgmap,
 		.pgprot = PAGE_KERNEL,
 	};
 	const int nr_range = pgmap->nr_range;
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 8814876edcfa..f57c5eada099 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -141,16 +141,20 @@ void __meminit vmemmap_verify(pte_t *pte, int node,
 }
 
 pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
-				       struct vmem_altmap *altmap)
+				       struct vmem_altmap *altmap, void *block)
 {
 	pte_t *pte = pte_offset_kernel(pmd, addr);
 	if (pte_none(*pte)) {
 		pte_t entry;
-		void *p;
-
-		p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
-		if (!p)
-			return NULL;
+		void *p = block;
+
+		if (!block) {
+			p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
+			if (!p)
+				return NULL;
+		} else if (!altmap) {
+			get_page(virt_to_page(block));
+		}
 		entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
 		set_pte_at(&init_mm, addr, pte, entry);
 	}
@@ -217,7 +221,8 @@ pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
 }
 
 static int __meminit vmemmap_populate_address(unsigned long addr, int node,
-					      struct vmem_altmap *altmap)
+					      struct vmem_altmap *altmap,
+					      void *page, void **ptr)
 {
 	pgd_t *pgd;
 	p4d_t *p4d;
@@ -237,10 +242,14 @@ static int __meminit vmemmap_populate_address(unsigned long addr, int node,
 	pmd = vmemmap_pmd_populate(pud, addr, node);
 	if (!pmd)
 		return -ENOMEM;
-	pte = vmemmap_pte_populate(pmd, addr, node, altmap);
+	pte = vmemmap_pte_populate(pmd, addr, node, altmap, page);
 	if (!pte)
 		return -ENOMEM;
 	vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+	if (ptr)
+		*ptr = __va(__pfn_to_phys(pte_pfn(*pte)));
+	return 0;
 }
 
 int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
@@ -249,7 +258,110 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
 	unsigned long addr = start;
 
 	for (; addr < end; addr += PAGE_SIZE) {
-		if (vmemmap_populate_address(addr, node, altmap))
+		if (vmemmap_populate_address(addr, node, altmap, NULL, NULL))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int __meminit vmemmap_populate_range(unsigned long start,
+					    unsigned long end,
+					    int node, void *page)
+{
+	unsigned long addr = start;
+
+	for (; addr < end; addr += PAGE_SIZE) {
+		if (vmemmap_populate_address(addr, node, NULL, page, NULL))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static inline int __meminit vmemmap_populate_page(unsigned long addr, int node,
+						  void **ptr)
+{
+	return vmemmap_populate_address(addr, node, NULL, NULL, ptr);
+}
+
+static pte_t * __meminit vmemmap_lookup_address(unsigned long addr)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset_k(addr);
+	if (pgd_none(*pgd))
+		return NULL;
+
+	p4d = p4d_offset(pgd, addr);
+	if (p4d_none(*p4d))
+		return NULL;
+
+	pud = pud_offset(p4d, addr);
+	if (pud_none(*pud))
+		return NULL;
+
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd))
+		return NULL;
+
+	pte = pte_offset_kernel(pmd, addr);
+	if (pte_none(*pte))
+		return NULL;
+
+	return pte;
+}
+
+static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
+						     unsigned long start,
+						     unsigned long end, int node,
+						     struct dev_pagemap *pgmap)
+{
+	unsigned long offset, size, addr;
+
+	/*
+	 * For compound pages bigger than section size (e.g. 1G) fill the rest
+	 * of sections as tail pages.
+	 *
+	 * Note that memremap_pages() resets @nr_range value and will increment
+	 * it after each range successful onlining. Thus the value or @nr_range
+	 * at section memmap populate corresponds to the in-progress range
+	 * being onlined that we care about.
+	 */
+	offset = PFN_PHYS(start_pfn) - pgmap->ranges[pgmap->nr_range].start;
+	if (!IS_ALIGNED(offset, pgmap_align(pgmap)) &&
+	    pgmap_align(pgmap) > SUBSECTION_SIZE) {
+		pte_t *ptep = vmemmap_lookup_address(start - PAGE_SIZE);
+
+		if (!ptep)
+			return -ENOMEM;
+
+		return vmemmap_populate_range(start, end, node,
+					      page_to_virt(pte_page(*ptep)));
+	}
+
+	size = min(end - start, pgmap_pfn_align(pgmap) * sizeof(struct page));
+	for (addr = start; addr < end; addr += size) {
+		unsigned long next = addr, last = addr + size;
+		void *block;
+
+		/* Populate the head page vmemmap page */
+		if (vmemmap_populate_page(addr, node, NULL))
+			return -ENOMEM;
+
+		/* Populate the tail pages vmemmap page */
+		block = NULL;
+		next = addr + PAGE_SIZE;
+		if (vmemmap_populate_page(next, node, &block))
+			return -ENOMEM;
+
+		/* Reuse the previous page for the rest of tail pages */
+		next += PAGE_SIZE;
+		if (vmemmap_populate_range(next, last, node, block))
 			return -ENOMEM;
 	}
 
@@ -262,12 +374,19 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn,
 {
 	unsigned long start = (unsigned long) pfn_to_page(pfn);
 	unsigned long end = start + nr_pages * sizeof(struct page);
+	unsigned int align = pgmap_align(pgmap);
+	int r;
 
 	if (WARN_ON_ONCE(!IS_ALIGNED(pfn, PAGES_PER_SUBSECTION) ||
 		!IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION)))
 		return NULL;
 
-	if (vmemmap_populate(start, end, nid, altmap))
+	if (align > PAGE_SIZE && !altmap)
+		r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap);
+	else
+		r = vmemmap_populate(start, end, nid, altmap);
+
+	if (r < 0)
 		return NULL;
 
 	return pfn_to_page(pfn);
-- 
2.17.1
_______________________________________________
Linux-nvdimm mailing list -- linux-nvdimm@lists.01.org
To unsubscribe send an email to linux-nvdimm-leave@lists.01.org

  parent reply	other threads:[~2021-03-25 23:10 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-25 23:09 [PATCH v1 00/11] mm, sparse-vmemmap: Introduce compound pagemaps Joao Martins
2021-03-25 23:09 ` [PATCH v1 01/11] memory-failure: fetch compound_head after pgmap_pfn_valid() Joao Martins
2021-04-24  0:12   ` Dan Williams
2021-04-24 19:00     ` Joao Martins
2021-03-25 23:09 ` [PATCH v1 02/11] mm/page_alloc: split prep_compound_page into head and tail subparts Joao Martins
2021-04-24  0:16   ` Dan Williams
2021-04-24 19:05     ` Joao Martins
2021-03-25 23:09 ` [PATCH v1 03/11] mm/page_alloc: refactor memmap_init_zone_device() page init Joao Martins
2021-04-24  0:18   ` Dan Williams
2021-04-24 19:05     ` Joao Martins
2021-03-25 23:09 ` [PATCH v1 04/11] mm/memremap: add ZONE_DEVICE support for compound pages Joao Martins
2021-05-05 18:44   ` Dan Williams
2021-05-05 18:58     ` Matthew Wilcox
2021-05-05 19:49     ` Joao Martins
2021-05-05 22:20       ` Dan Williams
2021-05-05 22:36         ` Joao Martins
2021-05-05 23:03           ` Dan Williams
2021-05-06 10:12             ` Joao Martins
2021-05-18 17:27           ` Joao Martins
2021-05-18 19:56             ` Jane Chu
2021-05-19 11:29               ` Joao Martins
2021-05-06  8:05         ` Aneesh Kumar K.V
2021-05-06 10:23           ` Joao Martins
2021-05-06 11:43             ` Matthew Wilcox
2021-05-06 12:15               ` Joao Martins
2021-03-25 23:09 ` [PATCH v1 05/11] mm/sparse-vmemmap: add a pgmap argument to section activation Joao Martins
2021-05-05 22:34   ` Dan Williams
2021-05-05 22:37     ` Joao Martins
2021-05-05 23:14       ` Dan Williams
2021-05-06 10:24         ` Joao Martins
2021-03-25 23:09 ` [PATCH v1 06/11] mm/sparse-vmemmap: refactor vmemmap_populate_basepages() Joao Martins
2021-05-05 22:43   ` Dan Williams
2021-05-06 10:27     ` Joao Martins
2021-05-06 18:36       ` Joao Martins
2021-03-25 23:09 ` Joao Martins [this message]
2021-05-06  1:18   ` [PATCH v1 07/11] mm/sparse-vmemmap: populate compound pagemaps Dan Williams
2021-05-06 11:01     ` Joao Martins
2021-05-10 19:19       ` Dan Williams
2021-05-13 18:45         ` Joao Martins
2021-03-25 23:09 ` [PATCH v1 08/11] mm/sparse-vmemmap: use hugepages for PUD " Joao Martins
2021-03-25 23:09 ` [PATCH v1 09/11] mm/page_alloc: reuse tail struct pages for " Joao Martins
2021-03-25 23:09 ` [PATCH v1 10/11] device-dax: compound pagemap support Joao Martins
2021-03-25 23:09 ` [PATCH v1 11/11] mm/gup: grab head page refcount once for group of subpages Joao Martins
2021-04-01  9:38 ` [PATCH v1 00/11] mm, sparse-vmemmap: Introduce compound pagemaps Joao Martins

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210325230938.30752-8-joao.m.martins@oracle.com \
    --to=joao.m.martins@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=jgg@ziepe.ca \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=mike.kravetz@oracle.com \
    --cc=songmuchun@bytedance.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).