loongarch.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] MIPS&LoongArch: Adjust prototypes of p?d_init()
@ 2022-06-25  9:54 Huacai Chen
  2022-06-25  9:54 ` [PATCH 2/3] LoongArch: Add sparse memory vmemmap support Huacai Chen
  2022-06-25  9:54 ` [PATCH 3/3] mm/sparse-vmemmap: Generalise vmemmap_populate_hugepages() Huacai Chen
  0 siblings, 2 replies; 5+ messages in thread
From: Huacai Chen @ 2022-06-25  9:54 UTC (permalink / raw)
  To: Arnd Bergmann, Huacai Chen, Thomas Bogendoerfer, Dave Hansen,
	Andy Lutomirski, Peter Zijlstra, Catalin Marinas, Will Deacon
  Cc: loongarch, linux-arch, Xuefeng Li, Guo Ren, Xuerui Wang,
	Jiaxun Yang, Andrew Morton, linux-mm, linux-mips, linux-kernel,
	linux-arm-kernel, Feiyang Chen, Huacai Chen

From: Feiyang Chen <chenfeiyang@loongson.cn>

We are preparing to add sparse vmemmap support to LoongArch. MIPS and
LoongArch need to call pgd_init()/pud_init()/pmd_init() when populating
page tables, so adjust their prototypes to make generic helpers can call
them.

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Signed-off-by: Feiyang Chen <chenfeiyang@loongson.cn>
---
 arch/loongarch/include/asm/pgalloc.h | 13 ++-----------
 arch/loongarch/include/asm/pgtable.h |  8 ++++----
 arch/loongarch/kernel/numa.c         |  4 ++--
 arch/loongarch/mm/pgtable.c          | 23 +++++++++++++----------
 arch/mips/include/asm/pgalloc.h      |  8 ++++----
 arch/mips/include/asm/pgtable-64.h   |  8 ++++----
 arch/mips/kvm/mmu.c                  |  3 +--
 arch/mips/mm/pgtable-32.c            | 10 +++++-----
 arch/mips/mm/pgtable-64.c            | 18 ++++++++++--------
 arch/mips/mm/pgtable.c               |  2 +-
 10 files changed, 46 insertions(+), 51 deletions(-)

diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
index b0a57b25c131..6a492d7aeeec 100644
--- a/arch/loongarch/include/asm/pgalloc.h
+++ b/arch/loongarch/include/asm/pgalloc.h
@@ -42,15 +42,6 @@ static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
 
 extern void pagetable_init(void);
 
-/*
- * Initialize a new pmd table with invalid pointers.
- */
-extern void pmd_init(unsigned long page, unsigned long pagetable);
-
-/*
- * Initialize a new pgd / pmd table with invalid pointers.
- */
-extern void pgd_init(unsigned long page);
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 
 #define __pte_free_tlb(tlb, pte, address)			\
@@ -76,7 +67,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 	}
 
 	pmd = (pmd_t *)page_address(pg);
-	pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table);
+	pmd_init(pmd);
 	return pmd;
 }
 
@@ -92,7 +83,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 
 	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
 	if (pud)
-		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
+		pud_init(pud);
 	return pud;
 }
 
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index d9e86cfa53e2..9c811c3f7572 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -243,11 +243,11 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pm
 #define pfn_pmd(pfn, prot)	__pmd(((pfn) << _PFN_SHIFT) | pgprot_val(prot))
 
 /*
- * Initialize a new pgd / pmd table with invalid pointers.
+ * Initialize a new pgd / pud / pmd table with invalid pointers.
  */
-extern void pgd_init(unsigned long page);
-extern void pud_init(unsigned long page, unsigned long pagetable);
-extern void pmd_init(unsigned long page, unsigned long pagetable);
+extern void pgd_init(void *addr);
+extern void pud_init(void *addr);
+extern void pmd_init(void *addr);
 
 /*
  * Non-present pages:  high 40 bits are offset, next 8 bits type,
diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
index a76f547a5aa3..839ce601f220 100644
--- a/arch/loongarch/kernel/numa.c
+++ b/arch/loongarch/kernel/numa.c
@@ -78,7 +78,7 @@ void __init pcpu_populate_pte(unsigned long addr)
 		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 		pgd_populate(&init_mm, pgd, new);
 #ifndef __PAGETABLE_PUD_FOLDED
-		pud_init((unsigned long)new, (unsigned long)invalid_pmd_table);
+		pud_init(new);
 #endif
 	}
 
@@ -89,7 +89,7 @@ void __init pcpu_populate_pte(unsigned long addr)
 		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 		pud_populate(&init_mm, pud, new);
 #ifndef __PAGETABLE_PMD_FOLDED
-		pmd_init((unsigned long)new, (unsigned long)invalid_pte_table);
+		pmd_init(new);
 #endif
 	}
 
diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c
index 0569647152e9..e79cc41acac5 100644
--- a/arch/loongarch/mm/pgtable.c
+++ b/arch/loongarch/mm/pgtable.c
@@ -16,7 +16,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ORDER);
 	if (ret) {
 		init = pgd_offset(&init_mm, 0UL);
-		pgd_init((unsigned long)ret);
+		pgd_init(ret);
 		memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
 		       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
 	}
@@ -25,7 +25,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(pgd_alloc);
 
-void pgd_init(unsigned long page)
+void pgd_init(void *addr)
 {
 	unsigned long *p, *end;
 	unsigned long entry;
@@ -38,7 +38,7 @@ void pgd_init(unsigned long page)
 	entry = (unsigned long)invalid_pte_table;
 #endif
 
-	p = (unsigned long *) page;
+	p = (unsigned long *)addr;
 	end = p + PTRS_PER_PGD;
 
 	do {
@@ -56,11 +56,12 @@ void pgd_init(unsigned long page)
 EXPORT_SYMBOL_GPL(pgd_init);
 
 #ifndef __PAGETABLE_PMD_FOLDED
-void pmd_init(unsigned long addr, unsigned long pagetable)
+void pmd_init(void *addr)
 {
 	unsigned long *p, *end;
+	unsigned long pagetable = (unsigned long)invalid_pte_table;
 
-	p = (unsigned long *) addr;
+	p = (unsigned long *)addr;
 	end = p + PTRS_PER_PMD;
 
 	do {
@@ -79,9 +80,10 @@ EXPORT_SYMBOL_GPL(pmd_init);
 #endif
 
 #ifndef __PAGETABLE_PUD_FOLDED
-void pud_init(unsigned long addr, unsigned long pagetable)
+void pud_init(void *addr)
 {
 	unsigned long *p, *end;
+	unsigned long pagetable = (unsigned long)invalid_pmd_table;
 
 	p = (unsigned long *)addr;
 	end = p + PTRS_PER_PUD;
@@ -98,6 +100,7 @@ void pud_init(unsigned long addr, unsigned long pagetable)
 		p[-1] = pagetable;
 	} while (p != end);
 }
+EXPORT_SYMBOL_GPL(pud_init);
 #endif
 
 pmd_t mk_pmd(struct page *page, pgprot_t prot)
@@ -119,12 +122,12 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 void __init pagetable_init(void)
 {
 	/* Initialize the entire pgd.  */
-	pgd_init((unsigned long)swapper_pg_dir);
-	pgd_init((unsigned long)invalid_pg_dir);
+	pgd_init(swapper_pg_dir);
+	pgd_init(invalid_pg_dir);
 #ifndef __PAGETABLE_PUD_FOLDED
-	pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table);
+	pud_init(invalid_pud_table);
 #endif
 #ifndef __PAGETABLE_PMD_FOLDED
-	pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table);
+	pmd_init(invalid_pmd_table);
 #endif
 }
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 867e9c3db76e..9f7d117c5ebf 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -33,7 +33,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 /*
  * Initialize a new pmd table with invalid pointers.
  */
-extern void pmd_init(unsigned long page, unsigned long pagetable);
+extern void pmd_init(void *addr);
 
 #ifndef __PAGETABLE_PMD_FOLDED
 
@@ -44,9 +44,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 #endif
 
 /*
- * Initialize a new pgd / pmd table with invalid pointers.
+ * Initialize a new pgd table with invalid pointers.
  */
-extern void pgd_init(unsigned long page);
+extern void pgd_init(void *addr);
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -77,7 +77,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 	}
 
 	pmd = (pmd_t *)page_address(pg);
-	pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table);
+	pmd_init(pmd);
 	return pmd;
 }
 
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 41921acdc9d8..8ac76bbb4b38 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -323,11 +323,11 @@ static inline pmd_t *pud_pgtable(pud_t pud)
 #endif
 
 /*
- * Initialize a new pgd / pmd table with invalid pointers.
+ * Initialize a new pgd / pud / pmd table with invalid pointers.
  */
-extern void pgd_init(unsigned long page);
-extern void pud_init(unsigned long page, unsigned long pagetable);
-extern void pmd_init(unsigned long page, unsigned long pagetable);
+extern void pgd_init(void *addr);
+extern void pud_init(void *addr);
+extern void pmd_init(void *addr);
 
 /*
  * Non-present pages:  high 40 bits are offset, next 8 bits type,
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 1bfd1b501d82..cb10a92ad3ae 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -122,8 +122,7 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
 		if (!cache)
 			return NULL;
 		new_pmd = kvm_mmu_memory_cache_alloc(cache);
-		pmd_init((unsigned long)new_pmd,
-			 (unsigned long)invalid_pte_table);
+		pmd_init(new_pmd);
 		pud_populate(NULL, pud, new_pmd);
 	}
 	pmd = pmd_offset(pud, addr);
diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index 61891af25019..88819a21d97e 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c
@@ -13,9 +13,9 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
-void pgd_init(unsigned long page)
+void pgd_init(void *addr)
 {
-	unsigned long *p = (unsigned long *) page;
+	unsigned long *p = (unsigned long *)addr;
 	int i;
 
 	for (i = 0; i < USER_PTRS_PER_PGD; i+=8) {
@@ -61,9 +61,9 @@ void __init pagetable_init(void)
 #endif
 
 	/* Initialize the entire pgd.  */
-	pgd_init((unsigned long)swapper_pg_dir);
-	pgd_init((unsigned long)swapper_pg_dir
-		 + sizeof(pgd_t) * USER_PTRS_PER_PGD);
+	pgd_init(swapper_pg_dir);
+	pgd_init((void *)((unsigned long)swapper_pg_dir
+		 + sizeof(pgd_t) * USER_PTRS_PER_PGD));
 
 	pgd_base = swapper_pg_dir;
 
diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
index 7536f7804c44..b4386a0e2ef8 100644
--- a/arch/mips/mm/pgtable-64.c
+++ b/arch/mips/mm/pgtable-64.c
@@ -13,7 +13,7 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
-void pgd_init(unsigned long page)
+void pgd_init(void *addr)
 {
 	unsigned long *p, *end;
 	unsigned long entry;
@@ -26,7 +26,7 @@ void pgd_init(unsigned long page)
 	entry = (unsigned long)invalid_pte_table;
 #endif
 
-	p = (unsigned long *) page;
+	p = (unsigned long *) addr;
 	end = p + PTRS_PER_PGD;
 
 	do {
@@ -43,11 +43,12 @@ void pgd_init(unsigned long page)
 }
 
 #ifndef __PAGETABLE_PMD_FOLDED
-void pmd_init(unsigned long addr, unsigned long pagetable)
+void pmd_init(void *addr)
 {
 	unsigned long *p, *end;
+	unsigned long pagetable = (unsigned long)invalid_pte_table;
 
-	p = (unsigned long *) addr;
+	p = (unsigned long *)addr;
 	end = p + PTRS_PER_PMD;
 
 	do {
@@ -66,9 +67,10 @@ EXPORT_SYMBOL_GPL(pmd_init);
 #endif
 
 #ifndef __PAGETABLE_PUD_FOLDED
-void pud_init(unsigned long addr, unsigned long pagetable)
+void pud_init(void *addr)
 {
 	unsigned long *p, *end;
+	unsigned long pagetable = (unsigned long)invalid_pmd_table;
 
 	p = (unsigned long *)addr;
 	end = p + PTRS_PER_PUD;
@@ -108,12 +110,12 @@ void __init pagetable_init(void)
 	pgd_t *pgd_base;
 
 	/* Initialize the entire pgd.  */
-	pgd_init((unsigned long)swapper_pg_dir);
+	pgd_init(swapper_pg_dir);
 #ifndef __PAGETABLE_PUD_FOLDED
-	pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table);
+	pud_init(invalid_pud_table);
 #endif
 #ifndef __PAGETABLE_PMD_FOLDED
-	pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table);
+	pmd_init(invalid_pmd_table);
 #endif
 	pgd_base = swapper_pg_dir;
 	/*
diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c
index 05560b042d82..98e5e880927a 100644
--- a/arch/mips/mm/pgtable.c
+++ b/arch/mips/mm/pgtable.c
@@ -15,7 +15,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ORDER);
 	if (ret) {
 		init = pgd_offset(&init_mm, 0UL);
-		pgd_init((unsigned long)ret);
+		pgd_init(ret);
 		memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
 		       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
 	}
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] LoongArch: Add sparse memory vmemmap support
  2022-06-25  9:54 [PATCH 1/3] MIPS&LoongArch: Adjust prototypes of p?d_init() Huacai Chen
@ 2022-06-25  9:54 ` Huacai Chen
  2022-06-27 10:33   ` Muchun Song
  2022-06-25  9:54 ` [PATCH 3/3] mm/sparse-vmemmap: Generalise vmemmap_populate_hugepages() Huacai Chen
  1 sibling, 1 reply; 5+ messages in thread
From: Huacai Chen @ 2022-06-25  9:54 UTC (permalink / raw)
  To: Arnd Bergmann, Huacai Chen, Thomas Bogendoerfer, Dave Hansen,
	Andy Lutomirski, Peter Zijlstra, Catalin Marinas, Will Deacon
  Cc: loongarch, linux-arch, Xuefeng Li, Guo Ren, Xuerui Wang,
	Jiaxun Yang, Andrew Morton, linux-mm, linux-mips, linux-kernel,
	linux-arm-kernel, Feiyang Chen, Min Zhou, Huacai Chen

From: Feiyang Chen <chenfeiyang@loongson.cn>

Add sparse memory vmemmap support for LoongArch. SPARSEMEM_VMEMMAP
uses a virtually mapped memmap to optimise pfn_to_page and page_to_pfn
operations. This is the most efficient option when sufficient kernel
resources are available.

Signed-off-by: Min Zhou <zhoumin@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Signed-off-by: Feiyang Chen <chenfeiyang@loongson.cn>
---
 arch/loongarch/Kconfig                 |  2 +
 arch/loongarch/include/asm/pgtable.h   |  5 +-
 arch/loongarch/include/asm/sparsemem.h |  8 +++
 arch/loongarch/mm/init.c               | 71 +++++++++++++++++++++++++-
 include/linux/mm.h                     |  2 +
 mm/sparse-vmemmap.c                    | 10 ++++
 6 files changed, 96 insertions(+), 2 deletions(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index dc19cf3071ea..8e56ca28165e 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -49,6 +49,7 @@ config LOONGARCH
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+	select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 	select ARCH_WANTS_NO_INSTR
 	select BUILDTIME_TABLE_SORT
 	select COMMON_CLK
@@ -422,6 +423,7 @@ config ARCH_FLATMEM_ENABLE
 
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
+	select SPARSEMEM_VMEMMAP_ENABLE
 	help
 	  Say Y to support efficient handling of sparse physical memory,
 	  for architectures which are either NUMA (Non-Uniform Memory Access)
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 9c811c3f7572..b701ec7a0309 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -92,7 +92,10 @@ extern unsigned long zero_page_mask;
 #define VMALLOC_START	MODULES_END
 #define VMALLOC_END	\
 	(vm_map_base +	\
-	 min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE)
+	 min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
+
+#define vmemmap		((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
+#define VMEMMAP_END	((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
 
 #define pte_ERROR(e) \
 	pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h
index 3d18cdf1b069..a1e440f6bec7 100644
--- a/arch/loongarch/include/asm/sparsemem.h
+++ b/arch/loongarch/include/asm/sparsemem.h
@@ -11,6 +11,14 @@
 #define SECTION_SIZE_BITS	29 /* 2^29 = Largest Huge Page Size */
 #define MAX_PHYSMEM_BITS	48
 
+#ifndef CONFIG_SPARSEMEM_VMEMMAP
+#define VMEMMAP_SIZE	0
+#else
+#define VMEMMAP_SIZE	(sizeof(struct page) * (1UL << (cpu_pabits + 1 - PAGE_SHIFT)))
+#endif
+
+#include <linux/mm_types.h>
+
 #endif /* CONFIG_SPARSEMEM */
 
 #ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 7094a68c9b83..35128229fe46 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -22,7 +22,7 @@
 #include <linux/pfn.h>
 #include <linux/hardirq.h>
 #include <linux/gfp.h>
-#include <linux/initrd.h>
+#include <linux/hugetlb.h>
 #include <linux/mmzone.h>
 
 #include <asm/asm-offsets.h>
@@ -157,6 +157,75 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 #endif
 #endif
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+					 int node, struct vmem_altmap *altmap)
+{
+	unsigned long addr = start;
+	unsigned long next;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	for (addr = start; addr < end; addr = next) {
+		next = pmd_addr_end(addr, end);
+
+		pgd = vmemmap_pgd_populate(addr, node);
+		if (!pgd)
+			return -ENOMEM;
+		p4d = vmemmap_p4d_populate(pgd, addr, node);
+		if (!p4d)
+			return -ENOMEM;
+		pud = vmemmap_pud_populate(p4d, addr, node);
+		if (!pud)
+			return -ENOMEM;
+
+		pmd = pmd_offset(pud, addr);
+		if (pmd_none(*pmd)) {
+			void *p = NULL;
+
+			p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL);
+			if (p) {
+				pmd_t entry;
+
+				entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
+				pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
+				set_pmd_at(&init_mm, addr, pmd, entry);
+
+				continue;
+			}
+		} else if (pmd_val(*pmd) & _PAGE_HUGE) {
+			vmemmap_verify((pte_t *)pmd, node, addr, next);
+			continue;
+		}
+		if (vmemmap_populate_basepages(addr, next, node, NULL))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+#if CONFIG_PGTABLE_LEVELS == 2
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+		struct vmem_altmap *altmap)
+{
+	return vmemmap_populate_basepages(start, end, node, NULL);
+}
+#else
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+		struct vmem_altmap *altmap)
+{
+	return vmemmap_populate_hugepages(start, end, node, NULL);
+}
+#endif
+
+void vmemmap_free(unsigned long start, unsigned long end,
+		struct vmem_altmap *altmap)
+{
+}
+#endif
+
 /*
  * Align swapper_pg_dir in to 64K, allows its address to be loaded
  * with a single LUI instruction in the TLB handlers.  If we used
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bc8f326be0ce..3472b924a1ea 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3203,6 +3203,8 @@ void *sparse_buffer_alloc(unsigned long size);
 struct page * __populate_section_memmap(unsigned long pfn,
 		unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
 		struct dev_pagemap *pgmap);
+void pmd_init(void *addr);
+void pud_init(void *addr);
 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
 p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
 pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index f4fa61dbbee3..33e2a1ceee72 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -587,6 +587,10 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
 	return pmd;
 }
 
+void __weak __meminit pmd_init(void *addr)
+{
+}
+
 pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
 {
 	pud_t *pud = pud_offset(p4d, addr);
@@ -594,11 +598,16 @@ pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
 		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
 		if (!p)
 			return NULL;
+		pmd_init(p);
 		pud_populate(&init_mm, pud, p);
 	}
 	return pud;
 }
 
+void __weak __meminit pud_init(void *addr)
+{
+}
+
 p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
 {
 	p4d_t *p4d = p4d_offset(pgd, addr);
@@ -606,6 +615,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
 		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
 		if (!p)
 			return NULL;
+		pud_init(p);
 		p4d_populate(&init_mm, p4d, p);
 	}
 	return p4d;
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/3] mm/sparse-vmemmap: Generalise vmemmap_populate_hugepages()
  2022-06-25  9:54 [PATCH 1/3] MIPS&LoongArch: Adjust prototypes of p?d_init() Huacai Chen
  2022-06-25  9:54 ` [PATCH 2/3] LoongArch: Add sparse memory vmemmap support Huacai Chen
@ 2022-06-25  9:54 ` Huacai Chen
  1 sibling, 0 replies; 5+ messages in thread
From: Huacai Chen @ 2022-06-25  9:54 UTC (permalink / raw)
  To: Arnd Bergmann, Huacai Chen, Thomas Bogendoerfer, Dave Hansen,
	Andy Lutomirski, Peter Zijlstra, Catalin Marinas, Will Deacon
  Cc: loongarch, linux-arch, Xuefeng Li, Guo Ren, Xuerui Wang,
	Jiaxun Yang, Andrew Morton, linux-mm, linux-mips, linux-kernel,
	linux-arm-kernel, Feiyang Chen, Huacai Chen

From: Feiyang Chen <chenfeiyang@loongson.cn>

Generalise vmemmap_populate_hugepages() so ARM64 & X86 & LoongArch can
share its implementation.

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Signed-off-by: Feiyang Chen <chenfeiyang@loongson.cn>
---
 arch/arm64/mm/mmu.c      | 53 ++++++-----------------
 arch/loongarch/mm/init.c | 63 ++++++++-------------------
 arch/x86/mm/init_64.c    | 92 ++++++++++++++--------------------------
 include/linux/mm.h       |  6 +++
 mm/sparse-vmemmap.c      | 54 +++++++++++++++++++++++
 5 files changed, 124 insertions(+), 144 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 626ec32873c6..b080a65c719d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1158,49 +1158,24 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 	return vmemmap_populate_basepages(start, end, node, altmap);
 }
 #else	/* !ARM64_KERNEL_USES_PMD_MAPS */
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+			       unsigned long addr, unsigned long next)
+{
+	pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+				unsigned long next)
+{
+	vmemmap_verify((pte_t *)pmd, node, addr, next);
+	return 1;
+}
+
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 		struct vmem_altmap *altmap)
 {
-	unsigned long addr = start;
-	unsigned long next;
-	pgd_t *pgdp;
-	p4d_t *p4dp;
-	pud_t *pudp;
-	pmd_t *pmdp;
-
 	WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
-	do {
-		next = pmd_addr_end(addr, end);
-
-		pgdp = vmemmap_pgd_populate(addr, node);
-		if (!pgdp)
-			return -ENOMEM;
-
-		p4dp = vmemmap_p4d_populate(pgdp, addr, node);
-		if (!p4dp)
-			return -ENOMEM;
-
-		pudp = vmemmap_pud_populate(p4dp, addr, node);
-		if (!pudp)
-			return -ENOMEM;
-
-		pmdp = pmd_offset(pudp, addr);
-		if (pmd_none(READ_ONCE(*pmdp))) {
-			void *p = NULL;
-
-			p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
-			if (!p) {
-				if (vmemmap_populate_basepages(addr, next, node, altmap))
-					return -ENOMEM;
-				continue;
-			}
-
-			pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
-		} else
-			vmemmap_verify((pte_t *)pmdp, node, addr, next);
-	} while (addr = next, addr != end);
-
-	return 0;
+	return vmemmap_populate_hugepages(start, end, node, altmap);
 }
 #endif	/* !ARM64_KERNEL_USES_PMD_MAPS */
 
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 35128229fe46..3190b3cd52d1 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -158,52 +158,25 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 #endif
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
-					 int node, struct vmem_altmap *altmap)
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+			       unsigned long addr, unsigned long next)
 {
-	unsigned long addr = start;
-	unsigned long next;
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-
-	for (addr = start; addr < end; addr = next) {
-		next = pmd_addr_end(addr, end);
-
-		pgd = vmemmap_pgd_populate(addr, node);
-		if (!pgd)
-			return -ENOMEM;
-		p4d = vmemmap_p4d_populate(pgd, addr, node);
-		if (!p4d)
-			return -ENOMEM;
-		pud = vmemmap_pud_populate(p4d, addr, node);
-		if (!pud)
-			return -ENOMEM;
-
-		pmd = pmd_offset(pud, addr);
-		if (pmd_none(*pmd)) {
-			void *p = NULL;
-
-			p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL);
-			if (p) {
-				pmd_t entry;
-
-				entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
-				pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
-				set_pmd_at(&init_mm, addr, pmd, entry);
-
-				continue;
-			}
-		} else if (pmd_val(*pmd) & _PAGE_HUGE) {
-			vmemmap_verify((pte_t *)pmd, node, addr, next);
-			continue;
-		}
-		if (vmemmap_populate_basepages(addr, next, node, NULL))
-			return -ENOMEM;
-	}
-
-	return 0;
+	pmd_t entry;
+
+	entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
+	pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
+	set_pmd_at(&init_mm, addr, pmd, entry);
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+				unsigned long next)
+{
+	int huge = pmd_val(*pmd) & _PAGE_HUGE;
+
+	if (huge)
+		vmemmap_verify((pte_t *)pmd, node, addr, next);
+
+	return huge;
 }
 
 #if CONFIG_PGTABLE_LEVELS == 2
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 39c5246964a9..4911093ee2f3 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1532,72 +1532,44 @@ static long __meminitdata addr_start, addr_end;
 static void __meminitdata *p_start, *p_end;
 static int __meminitdata node_start;
 
-static int __meminit vmemmap_populate_hugepages(unsigned long start,
-		unsigned long end, int node, struct vmem_altmap *altmap)
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+			       unsigned long addr, unsigned long next)
 {
-	unsigned long addr;
-	unsigned long next;
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-
-	for (addr = start; addr < end; addr = next) {
-		next = pmd_addr_end(addr, end);
-
-		pgd = vmemmap_pgd_populate(addr, node);
-		if (!pgd)
-			return -ENOMEM;
-
-		p4d = vmemmap_p4d_populate(pgd, addr, node);
-		if (!p4d)
-			return -ENOMEM;
-
-		pud = vmemmap_pud_populate(p4d, addr, node);
-		if (!pud)
-			return -ENOMEM;
-
-		pmd = pmd_offset(pud, addr);
-		if (pmd_none(*pmd)) {
-			void *p;
-
-			p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
-			if (p) {
-				pte_t entry;
-
-				entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
-						PAGE_KERNEL_LARGE);
-				set_pmd(pmd, __pmd(pte_val(entry)));
+	pte_t entry;
+
+	entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
+			PAGE_KERNEL_LARGE);
+	set_pmd(pmd, __pmd(pte_val(entry)));
+
+	/* check to see if we have contiguous blocks */
+	if (p_end != p || node_start != node) {
+		if (p_start)
+			pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
+				addr_start, addr_end-1, p_start, p_end-1, node_start);
+		addr_start = addr;
+		node_start = node;
+		p_start = p;
+	}
 
-				/* check to see if we have contiguous blocks */
-				if (p_end != p || node_start != node) {
-					if (p_start)
-						pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
-						       addr_start, addr_end-1, p_start, p_end-1, node_start);
-					addr_start = addr;
-					node_start = node;
-					p_start = p;
-				}
+	addr_end = addr + PMD_SIZE;
+	p_end = p + PMD_SIZE;
 
-				addr_end = addr + PMD_SIZE;
-				p_end = p + PMD_SIZE;
+	if (!IS_ALIGNED(addr, PMD_SIZE) ||
+		!IS_ALIGNED(next, PMD_SIZE))
+		vmemmap_use_new_sub_pmd(addr, next);
+}
 
-				if (!IS_ALIGNED(addr, PMD_SIZE) ||
-				    !IS_ALIGNED(next, PMD_SIZE))
-					vmemmap_use_new_sub_pmd(addr, next);
+int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+				unsigned long next)
+{
+	int large = pmd_large(*pmd);
 
-				continue;
-			} else if (altmap)
-				return -ENOMEM; /* no fallback */
-		} else if (pmd_large(*pmd)) {
-			vmemmap_verify((pte_t *)pmd, node, addr, next);
-			vmemmap_use_sub_pmd(addr, next);
-			continue;
-		}
-		if (vmemmap_populate_basepages(addr, next, node, NULL))
-			return -ENOMEM;
+	if (pmd_large(*pmd)) {
+		vmemmap_verify((pte_t *)pmd, node, addr, next);
+		vmemmap_use_sub_pmd(addr, next);
 	}
-	return 0;
+
+	return large;
 }
 
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3472b924a1ea..45df4b7562bc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3216,8 +3216,14 @@ struct vmem_altmap;
 void *vmemmap_alloc_block_buf(unsigned long size, int node,
 			      struct vmem_altmap *altmap);
 void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
+void vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+		     unsigned long addr, unsigned long next);
+int vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+		      unsigned long next);
 int vmemmap_populate_basepages(unsigned long start, unsigned long end,
 			       int node, struct vmem_altmap *altmap);
+int vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+			       int node, struct vmem_altmap *altmap);
 int vmemmap_populate(unsigned long start, unsigned long end, int node,
 		struct vmem_altmap *altmap);
 void vmemmap_populate_print_last(void);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 33e2a1ceee72..6f2e40bb695d 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -686,6 +686,60 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
 	return vmemmap_populate_range(start, end, node, altmap, NULL);
 }
 
+void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+				      unsigned long addr, unsigned long next)
+{
+}
+
+int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr,
+				       unsigned long next)
+{
+	return 0;
+}
+
+int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+					 int node, struct vmem_altmap *altmap)
+{
+	unsigned long addr;
+	unsigned long next;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	for (addr = start; addr < end; addr = next) {
+		next = pmd_addr_end(addr, end);
+
+		pgd = vmemmap_pgd_populate(addr, node);
+		if (!pgd)
+			return -ENOMEM;
+
+		p4d = vmemmap_p4d_populate(pgd, addr, node);
+		if (!p4d)
+			return -ENOMEM;
+
+		pud = vmemmap_pud_populate(p4d, addr, node);
+		if (!pud)
+			return -ENOMEM;
+
+		pmd = pmd_offset(pud, addr);
+		if (pmd_none(READ_ONCE(*pmd))) {
+			void *p;
+
+			p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
+			if (p) {
+				vmemmap_set_pmd(pmd, p, node, addr, next);
+				continue;
+			} else if (altmap)
+				return -ENOMEM; /* no fallback */
+		} else if (vmemmap_check_pmd(pmd, node, addr, next))
+			continue;
+		if (vmemmap_populate_basepages(addr, next, node, altmap))
+			return -ENOMEM;
+	}
+	return 0;
+}
+
 /*
  * For compound pages bigger than section size (e.g. x86 1G compound
  * pages with 2M subsection size) fill the rest of sections as tail
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/3] LoongArch: Add sparse memory vmemmap support
  2022-06-25  9:54 ` [PATCH 2/3] LoongArch: Add sparse memory vmemmap support Huacai Chen
@ 2022-06-27 10:33   ` Muchun Song
  2022-06-28  3:22     ` Huacai Chen
  0 siblings, 1 reply; 5+ messages in thread
From: Muchun Song @ 2022-06-27 10:33 UTC (permalink / raw)
  To: Huacai Chen
  Cc: Arnd Bergmann, Huacai Chen, Thomas Bogendoerfer, Dave Hansen,
	Andy Lutomirski, Peter Zijlstra, Catalin Marinas, Will Deacon,
	loongarch, linux-arch, Xuefeng Li, Guo Ren, Xuerui Wang,
	Jiaxun Yang, Andrew Morton, Linux Memory Management List,
	linux-mips, LKML, LAK, Feiyang Chen, Min Zhou

On Sat, Jun 25, 2022 at 5:54 PM Huacai Chen <chenhuacai@loongson.cn> wrote:
>
> From: Feiyang Chen <chenfeiyang@loongson.cn>
>
> Add sparse memory vmemmap support for LoongArch. SPARSEMEM_VMEMMAP
> uses a virtually mapped memmap to optimise pfn_to_page and page_to_pfn
> operations. This is the most efficient option when sufficient kernel
> resources are available.
>
> Signed-off-by: Min Zhou <zhoumin@loongson.cn>
> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
> Signed-off-by: Feiyang Chen <chenfeiyang@loongson.cn>
> ---
>  arch/loongarch/Kconfig                 |  2 +
>  arch/loongarch/include/asm/pgtable.h   |  5 +-
>  arch/loongarch/include/asm/sparsemem.h |  8 +++
>  arch/loongarch/mm/init.c               | 71 +++++++++++++++++++++++++-
>  include/linux/mm.h                     |  2 +
>  mm/sparse-vmemmap.c                    | 10 ++++
>  6 files changed, 96 insertions(+), 2 deletions(-)
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index dc19cf3071ea..8e56ca28165e 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -49,6 +49,7 @@ config LOONGARCH
>         select ARCH_USE_QUEUED_RWLOCKS
>         select ARCH_USE_QUEUED_SPINLOCKS
>         select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
> +       select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP

I think this should be a separate patch to enable HVO (HugeTLB Vmemmap
Optimization) since it is irrelevant to this patch.

Thanks.

>         select ARCH_WANTS_NO_INSTR
>         select BUILDTIME_TABLE_SORT
>         select COMMON_CLK
> @@ -422,6 +423,7 @@ config ARCH_FLATMEM_ENABLE
>
>  config ARCH_SPARSEMEM_ENABLE
>         def_bool y
> +       select SPARSEMEM_VMEMMAP_ENABLE
>         help
>           Say Y to support efficient handling of sparse physical memory,
>           for architectures which are either NUMA (Non-Uniform Memory Access)
> diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
> index 9c811c3f7572..b701ec7a0309 100644
> --- a/arch/loongarch/include/asm/pgtable.h
> +++ b/arch/loongarch/include/asm/pgtable.h
> @@ -92,7 +92,10 @@ extern unsigned long zero_page_mask;
>  #define VMALLOC_START  MODULES_END
>  #define VMALLOC_END    \
>         (vm_map_base +  \
> -        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE)
> +        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
> +
> +#define vmemmap                ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
> +#define VMEMMAP_END    ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
>
>  #define pte_ERROR(e) \
>         pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
> diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h
> index 3d18cdf1b069..a1e440f6bec7 100644
> --- a/arch/loongarch/include/asm/sparsemem.h
> +++ b/arch/loongarch/include/asm/sparsemem.h
> @@ -11,6 +11,14 @@
>  #define SECTION_SIZE_BITS      29 /* 2^29 = Largest Huge Page Size */
>  #define MAX_PHYSMEM_BITS       48
>
> +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> +#define VMEMMAP_SIZE   0
> +#else
> +#define VMEMMAP_SIZE   (sizeof(struct page) * (1UL << (cpu_pabits + 1 - PAGE_SHIFT)))
> +#endif
> +
> +#include <linux/mm_types.h>
> +
>  #endif /* CONFIG_SPARSEMEM */
>
>  #ifdef CONFIG_MEMORY_HOTPLUG
> diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
> index 7094a68c9b83..35128229fe46 100644
> --- a/arch/loongarch/mm/init.c
> +++ b/arch/loongarch/mm/init.c
> @@ -22,7 +22,7 @@
>  #include <linux/pfn.h>
>  #include <linux/hardirq.h>
>  #include <linux/gfp.h>
> -#include <linux/initrd.h>
> +#include <linux/hugetlb.h>
>  #include <linux/mmzone.h>
>
>  #include <asm/asm-offsets.h>
> @@ -157,6 +157,75 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
>  #endif
>  #endif
>
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
> +                                        int node, struct vmem_altmap *altmap)
> +{
> +       unsigned long addr = start;
> +       unsigned long next;
> +       pgd_t *pgd;
> +       p4d_t *p4d;
> +       pud_t *pud;
> +       pmd_t *pmd;
> +
> +       for (addr = start; addr < end; addr = next) {
> +               next = pmd_addr_end(addr, end);
> +
> +               pgd = vmemmap_pgd_populate(addr, node);
> +               if (!pgd)
> +                       return -ENOMEM;
> +               p4d = vmemmap_p4d_populate(pgd, addr, node);
> +               if (!p4d)
> +                       return -ENOMEM;
> +               pud = vmemmap_pud_populate(p4d, addr, node);
> +               if (!pud)
> +                       return -ENOMEM;
> +
> +               pmd = pmd_offset(pud, addr);
> +               if (pmd_none(*pmd)) {
> +                       void *p = NULL;
> +
> +                       p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL);
> +                       if (p) {
> +                               pmd_t entry;
> +
> +                               entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
> +                               pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
> +                               set_pmd_at(&init_mm, addr, pmd, entry);
> +
> +                               continue;
> +                       }
> +               } else if (pmd_val(*pmd) & _PAGE_HUGE) {
> +                       vmemmap_verify((pte_t *)pmd, node, addr, next);
> +                       continue;
> +               }
> +               if (vmemmap_populate_basepages(addr, next, node, NULL))
> +                       return -ENOMEM;
> +       }
> +
> +       return 0;
> +}
> +
> +#if CONFIG_PGTABLE_LEVELS == 2
> +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
> +               struct vmem_altmap *altmap)
> +{
> +       return vmemmap_populate_basepages(start, end, node, NULL);
> +}
> +#else
> +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
> +               struct vmem_altmap *altmap)
> +{
> +       return vmemmap_populate_hugepages(start, end, node, NULL);
> +}
> +#endif
> +
> +void vmemmap_free(unsigned long start, unsigned long end,
> +               struct vmem_altmap *altmap)
> +{
> +}
> +#endif
> +
>  /*
>   * Align swapper_pg_dir in to 64K, allows its address to be loaded
>   * with a single LUI instruction in the TLB handlers.  If we used
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index bc8f326be0ce..3472b924a1ea 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3203,6 +3203,8 @@ void *sparse_buffer_alloc(unsigned long size);
>  struct page * __populate_section_memmap(unsigned long pfn,
>                 unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
>                 struct dev_pagemap *pgmap);
> +void pmd_init(void *addr);
> +void pud_init(void *addr);
>  pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
>  p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
>  pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
> diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> index f4fa61dbbee3..33e2a1ceee72 100644
> --- a/mm/sparse-vmemmap.c
> +++ b/mm/sparse-vmemmap.c
> @@ -587,6 +587,10 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
>         return pmd;
>  }
>
> +void __weak __meminit pmd_init(void *addr)
> +{
> +}
> +
>  pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
>  {
>         pud_t *pud = pud_offset(p4d, addr);
> @@ -594,11 +598,16 @@ pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
>                 void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
>                 if (!p)
>                         return NULL;
> +               pmd_init(p);
>                 pud_populate(&init_mm, pud, p);
>         }
>         return pud;
>  }
>
> +void __weak __meminit pud_init(void *addr)
> +{
> +}
> +
>  p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
>  {
>         p4d_t *p4d = p4d_offset(pgd, addr);
> @@ -606,6 +615,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
>                 void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
>                 if (!p)
>                         return NULL;
> +               pud_init(p);
>                 p4d_populate(&init_mm, p4d, p);
>         }
>         return p4d;
> --
> 2.27.0
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/3] LoongArch: Add sparse memory vmemmap support
  2022-06-27 10:33   ` Muchun Song
@ 2022-06-28  3:22     ` Huacai Chen
  0 siblings, 0 replies; 5+ messages in thread
From: Huacai Chen @ 2022-06-28  3:22 UTC (permalink / raw)
  To: Muchun Song
  Cc: Huacai Chen, Arnd Bergmann, Thomas Bogendoerfer, Dave Hansen,
	Andy Lutomirski, Peter Zijlstra, Catalin Marinas, Will Deacon,
	loongarch, linux-arch, Xuefeng Li, Guo Ren, Xuerui Wang,
	Jiaxun Yang, Andrew Morton, Linux Memory Management List,
	open list:MIPS, LKML, LAK, Feiyang Chen, Min Zhou

Hi, Muchun,

On Mon, Jun 27, 2022 at 6:33 PM Muchun Song <songmuchun@bytedance.com> wrote:
>
> On Sat, Jun 25, 2022 at 5:54 PM Huacai Chen <chenhuacai@loongson.cn> wrote:
> >
> > From: Feiyang Chen <chenfeiyang@loongson.cn>
> >
> > Add sparse memory vmemmap support for LoongArch. SPARSEMEM_VMEMMAP
> > uses a virtually mapped memmap to optimise pfn_to_page and page_to_pfn
> > operations. This is the most efficient option when sufficient kernel
> > resources are available.
> >
> > Signed-off-by: Min Zhou <zhoumin@loongson.cn>
> > Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
> > Signed-off-by: Feiyang Chen <chenfeiyang@loongson.cn>
> > ---
> >  arch/loongarch/Kconfig                 |  2 +
> >  arch/loongarch/include/asm/pgtable.h   |  5 +-
> >  arch/loongarch/include/asm/sparsemem.h |  8 +++
> >  arch/loongarch/mm/init.c               | 71 +++++++++++++++++++++++++-
> >  include/linux/mm.h                     |  2 +
> >  mm/sparse-vmemmap.c                    | 10 ++++
> >  6 files changed, 96 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> > index dc19cf3071ea..8e56ca28165e 100644
> > --- a/arch/loongarch/Kconfig
> > +++ b/arch/loongarch/Kconfig
> > @@ -49,6 +49,7 @@ config LOONGARCH
> >         select ARCH_USE_QUEUED_RWLOCKS
> >         select ARCH_USE_QUEUED_SPINLOCKS
> >         select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
> > +       select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
>
> I think this should be a separate patch to enable HVO (HugeTLB Vmemmap
> Optimization) since it is irrelevant to this patch.
It seems I have misunderstood HVO, then I will remove HVO parts from
this patch. Thank you.

Huacai
>
> Thanks.
>
> >         select ARCH_WANTS_NO_INSTR
> >         select BUILDTIME_TABLE_SORT
> >         select COMMON_CLK
> > @@ -422,6 +423,7 @@ config ARCH_FLATMEM_ENABLE
> >
> >  config ARCH_SPARSEMEM_ENABLE
> >         def_bool y
> > +       select SPARSEMEM_VMEMMAP_ENABLE
> >         help
> >           Say Y to support efficient handling of sparse physical memory,
> >           for architectures which are either NUMA (Non-Uniform Memory Access)
> > diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
> > index 9c811c3f7572..b701ec7a0309 100644
> > --- a/arch/loongarch/include/asm/pgtable.h
> > +++ b/arch/loongarch/include/asm/pgtable.h
> > @@ -92,7 +92,10 @@ extern unsigned long zero_page_mask;
> >  #define VMALLOC_START  MODULES_END
> >  #define VMALLOC_END    \
> >         (vm_map_base +  \
> > -        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE)
> > +        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
> > +
> > +#define vmemmap                ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
> > +#define VMEMMAP_END    ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
> >
> >  #define pte_ERROR(e) \
> >         pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
> > diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h
> > index 3d18cdf1b069..a1e440f6bec7 100644
> > --- a/arch/loongarch/include/asm/sparsemem.h
> > +++ b/arch/loongarch/include/asm/sparsemem.h
> > @@ -11,6 +11,14 @@
> >  #define SECTION_SIZE_BITS      29 /* 2^29 = Largest Huge Page Size */
> >  #define MAX_PHYSMEM_BITS       48
> >
> > +#ifndef CONFIG_SPARSEMEM_VMEMMAP
> > +#define VMEMMAP_SIZE   0
> > +#else
> > +#define VMEMMAP_SIZE   (sizeof(struct page) * (1UL << (cpu_pabits + 1 - PAGE_SHIFT)))
> > +#endif
> > +
> > +#include <linux/mm_types.h>
> > +
> >  #endif /* CONFIG_SPARSEMEM */
> >
> >  #ifdef CONFIG_MEMORY_HOTPLUG
> > diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
> > index 7094a68c9b83..35128229fe46 100644
> > --- a/arch/loongarch/mm/init.c
> > +++ b/arch/loongarch/mm/init.c
> > @@ -22,7 +22,7 @@
> >  #include <linux/pfn.h>
> >  #include <linux/hardirq.h>
> >  #include <linux/gfp.h>
> > -#include <linux/initrd.h>
> > +#include <linux/hugetlb.h>
> >  #include <linux/mmzone.h>
> >
> >  #include <asm/asm-offsets.h>
> > @@ -157,6 +157,75 @@ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
> >  #endif
> >  #endif
> >
> > +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> > +int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
> > +                                        int node, struct vmem_altmap *altmap)
> > +{
> > +       unsigned long addr = start;
> > +       unsigned long next;
> > +       pgd_t *pgd;
> > +       p4d_t *p4d;
> > +       pud_t *pud;
> > +       pmd_t *pmd;
> > +
> > +       for (addr = start; addr < end; addr = next) {
> > +               next = pmd_addr_end(addr, end);
> > +
> > +               pgd = vmemmap_pgd_populate(addr, node);
> > +               if (!pgd)
> > +                       return -ENOMEM;
> > +               p4d = vmemmap_p4d_populate(pgd, addr, node);
> > +               if (!p4d)
> > +                       return -ENOMEM;
> > +               pud = vmemmap_pud_populate(p4d, addr, node);
> > +               if (!pud)
> > +                       return -ENOMEM;
> > +
> > +               pmd = pmd_offset(pud, addr);
> > +               if (pmd_none(*pmd)) {
> > +                       void *p = NULL;
> > +
> > +                       p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL);
> > +                       if (p) {
> > +                               pmd_t entry;
> > +
> > +                               entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
> > +                               pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
> > +                               set_pmd_at(&init_mm, addr, pmd, entry);
> > +
> > +                               continue;
> > +                       }
> > +               } else if (pmd_val(*pmd) & _PAGE_HUGE) {
> > +                       vmemmap_verify((pte_t *)pmd, node, addr, next);
> > +                       continue;
> > +               }
> > +               if (vmemmap_populate_basepages(addr, next, node, NULL))
> > +                       return -ENOMEM;
> > +       }
> > +
> > +       return 0;
> > +}
> > +
> > +#if CONFIG_PGTABLE_LEVELS == 2
> > +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
> > +               struct vmem_altmap *altmap)
> > +{
> > +       return vmemmap_populate_basepages(start, end, node, NULL);
> > +}
> > +#else
> > +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
> > +               struct vmem_altmap *altmap)
> > +{
> > +       return vmemmap_populate_hugepages(start, end, node, NULL);
> > +}
> > +#endif
> > +
> > +void vmemmap_free(unsigned long start, unsigned long end,
> > +               struct vmem_altmap *altmap)
> > +{
> > +}
> > +#endif
> > +
> >  /*
> >   * Align swapper_pg_dir in to 64K, allows its address to be loaded
> >   * with a single LUI instruction in the TLB handlers.  If we used
> > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > index bc8f326be0ce..3472b924a1ea 100644
> > --- a/include/linux/mm.h
> > +++ b/include/linux/mm.h
> > @@ -3203,6 +3203,8 @@ void *sparse_buffer_alloc(unsigned long size);
> >  struct page * __populate_section_memmap(unsigned long pfn,
> >                 unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
> >                 struct dev_pagemap *pgmap);
> > +void pmd_init(void *addr);
> > +void pud_init(void *addr);
> >  pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
> >  p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
> >  pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
> > diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
> > index f4fa61dbbee3..33e2a1ceee72 100644
> > --- a/mm/sparse-vmemmap.c
> > +++ b/mm/sparse-vmemmap.c
> > @@ -587,6 +587,10 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
> >         return pmd;
> >  }
> >
> > +void __weak __meminit pmd_init(void *addr)
> > +{
> > +}
> > +
> >  pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
> >  {
> >         pud_t *pud = pud_offset(p4d, addr);
> > @@ -594,11 +598,16 @@ pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
> >                 void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
> >                 if (!p)
> >                         return NULL;
> > +               pmd_init(p);
> >                 pud_populate(&init_mm, pud, p);
> >         }
> >         return pud;
> >  }
> >
> > +void __weak __meminit pud_init(void *addr)
> > +{
> > +}
> > +
> >  p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
> >  {
> >         p4d_t *p4d = p4d_offset(pgd, addr);
> > @@ -606,6 +615,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
> >                 void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
> >                 if (!p)
> >                         return NULL;
> > +               pud_init(p);
> >                 p4d_populate(&init_mm, p4d, p);
> >         }
> >         return p4d;
> > --
> > 2.27.0
> >

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-06-28  3:22 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-25  9:54 [PATCH 1/3] MIPS&LoongArch: Adjust prototypes of p?d_init() Huacai Chen
2022-06-25  9:54 ` [PATCH 2/3] LoongArch: Add sparse memory vmemmap support Huacai Chen
2022-06-27 10:33   ` Muchun Song
2022-06-28  3:22     ` Huacai Chen
2022-06-25  9:54 ` [PATCH 3/3] mm/sparse-vmemmap: Generalise vmemmap_populate_hugepages() Huacai Chen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).