All of lore.kernel.org
 help / color / mirror / Atom feed
* [RESEND PATCH v1 0/2] Add Sv57 page table support
@ 2021-11-24 11:20 panqinglin2020
  2021-11-24 11:20 ` [PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation panqinglin2020
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: panqinglin2020 @ 2021-11-24 11:20 UTC (permalink / raw)
  To: paul.walmsley, palmer, aou, linux-riscv; +Cc: jeff, xuyinan, Qinglin Pan

From: Qinglin Pan <panqinglin2020@iscas.ac.cn>

This patchset is based on Alex's Sv48 patchset v2.

This implements Sv57 support at runtime. The kernel will try to boot with
5-level page table firstly when the mmu-type field in dtb is "mmu,sv57" or null,
and will fallback to 4-level if the HW does not support it.

Tested on:
  - qemu rv64 sv39
  - qemu rv64 sv48
  - qemu rv64 sv57
  - Sifive unmatched

Qinglin Pan (2):
  riscv,mm: Add Sv57 support based on Sv48 implementation
  Documentation, mm: Add Sv57 vm layout documentation

 Documentation/riscv/vm-layout.rst   |  36 ++++++
 arch/riscv/Kconfig                  |   4 +-
 arch/riscv/include/asm/csr.h        |   1 +
 arch/riscv/include/asm/fixmap.h     |   1 +
 arch/riscv/include/asm/page.h       |   1 +
 arch/riscv/include/asm/pgalloc.h    |  49 ++++++++
 arch/riscv/include/asm/pgtable-64.h | 103 ++++++++++++++++-
 arch/riscv/include/asm/pgtable.h    |   4 +-
 arch/riscv/kernel/cpu.c             |   4 +-
 arch/riscv/mm/init.c                | 169 +++++++++++++++++++++++++---
 10 files changed, 348 insertions(+), 24 deletions(-)

-- 
2.32.0


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation
  2021-11-24 11:20 [RESEND PATCH v1 0/2] Add Sv57 page table support panqinglin2020
@ 2021-11-24 11:20 ` panqinglin2020
  2021-11-29 11:20   ` Alexandre ghiti
  2021-11-24 11:20 ` [PATCH v1 2/2] Documentation, mm: Add Sv57 vm layout documentation panqinglin2020
  2021-11-24 23:29 ` [RESEND PATCH v1 0/2] Add Sv57 page table support Heiko Stübner
  2 siblings, 1 reply; 9+ messages in thread
From: panqinglin2020 @ 2021-11-24 11:20 UTC (permalink / raw)
  To: paul.walmsley, palmer, aou, linux-riscv; +Cc: jeff, xuyinan, Qinglin Pan

From: Qinglin Pan <panqinglin2020@iscas.ac.cn>

This patch adds Sv57 implementation on the top of Alex's Sv48 patchset.
The mmu configuration will be determined on runtime, according to both
mmu HW support and mmu-type field in the dtb. The kernel will try to
set satp mode one by one from the configuration item to Sv39 in 64bit.

Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
---
 arch/riscv/Kconfig                  |   4 +-
 arch/riscv/include/asm/csr.h        |   1 +
 arch/riscv/include/asm/fixmap.h     |   1 +
 arch/riscv/include/asm/page.h       |   1 +
 arch/riscv/include/asm/pgalloc.h    |  49 ++++++++
 arch/riscv/include/asm/pgtable-64.h | 103 ++++++++++++++++-
 arch/riscv/include/asm/pgtable.h    |   4 +-
 arch/riscv/kernel/cpu.c             |   4 +-
 arch/riscv/mm/init.c                | 169 +++++++++++++++++++++++++---
 9 files changed, 312 insertions(+), 24 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a4cadcd4e785..aac28e96d0e5 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -151,7 +151,7 @@ config PAGE_OFFSET
 	hex
 	default 0xC0000000 if 32BIT
 	default 0x80000000 if 64BIT && !MMU
-	default 0xffffc00000000000 if 64BIT
+	default 0xff80000000000000 if 64BIT
 
 config ARCH_FLATMEM_ENABLE
 	def_bool !NUMA
@@ -196,7 +196,7 @@ config FIX_EARLYCON_MEM
 
 config PGTABLE_LEVELS
 	int
-	default 4 if 64BIT
+	default 5 if 64BIT
 	default 2
 
 config LOCKDEP_SUPPORT
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index ae711692eec9..299abdef0cd6 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -47,6 +47,7 @@
 #define SATP_PPN	_AC(0x00000FFFFFFFFFFF, UL)
 #define SATP_MODE_39	_AC(0x8000000000000000, UL)
 #define SATP_MODE_48	_AC(0x9000000000000000, UL)
+#define SATP_MODE_57	_AC(0xa000000000000000, UL)
 #define SATP_ASID_BITS	16
 #define SATP_ASID_SHIFT	44
 #define SATP_ASID_MASK	_AC(0xFFFF, UL)
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 58a718573ad6..3cfece8b6568 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -25,6 +25,7 @@ enum fixed_addresses {
 	FIX_PTE,
 	FIX_PMD,
 	FIX_PUD,
+	FIX_P4D,
 	FIX_TEXT_POKE1,
 	FIX_TEXT_POKE0,
 	FIX_EARLYCON_MEM_BASE,
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 63334568a10e..41e0d88234d5 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -37,6 +37,7 @@
  * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
  * define the PAGE_OFFSET value for SV39.
  */
+#define PAGE_OFFSET_L4		_AC(0xffffc00000000000, UL)
 #define PAGE_OFFSET_L3		_AC(0xffffffe000000000, UL)
 #else
 #define PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 11823004b87a..947f23d7b6af 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -59,6 +59,26 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
 	}
 }
 
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
+{
+	if (pgtable_l5_enabled) {
+		unsigned long pfn = virt_to_pfn(p4d);
+
+		set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+	}
+}
+
+static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd,
+				     p4d_t *p4d)
+{
+	if (pgtable_l5_enabled) {
+		unsigned long pfn = virt_to_pfn(p4d);
+
+		set_pgd_safe(pgd,
+			     __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+	}
+}
+
 #define pud_alloc_one pud_alloc_one
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
@@ -76,6 +96,35 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 }
 
 #define __pud_free_tlb(tlb, pud, addr)  pud_free((tlb)->mm, pud)
+
+#define p4d_alloc_one p4d_alloc_one
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	if (pgtable_l5_enabled) {
+		gfp_t gfp = GFP_PGTABLE_USER;
+
+		if (mm == &init_mm)
+			gfp = GFP_PGTABLE_KERNEL;
+		return (p4d_t *)get_zeroed_page(gfp);
+	}
+
+	return NULL;
+}
+
+static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+	BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
+	free_page((unsigned long)p4d);
+}
+
+#define p4d_free p4d_free
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+	if (pgtable_l5_enabled)
+		__p4d_free(mm, p4d);
+}
+
+#define __p4d_free_tlb(tlb, p4d, addr)  p4d_free((tlb)->mm, p4d)
 #endif /* __PAGETABLE_PMD_FOLDED */
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index bbbdd66e5e2f..a01386d4094f 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -9,16 +9,24 @@
 #include <linux/const.h>
 
 extern bool pgtable_l4_enabled;
+extern bool pgtable_l5_enabled;
 
 #define PGDIR_SHIFT_L3  30
 #define PGDIR_SHIFT_L4  39
+#define PGDIR_SHIFT_L5  48
 #define PGDIR_SIZE_L3   (_AC(1, UL) << PGDIR_SHIFT_L3)
 
-#define PGDIR_SHIFT     (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)
+#define PGDIR_SHIFT     (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \
+		(pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3))
 /* Size of region mapped by a page global directory */
 #define PGDIR_SIZE      (_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK      (~(PGDIR_SIZE - 1))
 
+/* p4d is folded into pgd in case of 4-level page table */
+#define P4D_SHIFT      39
+#define P4D_SIZE       (_AC(1, UL) << P4D_SHIFT)
+#define P4D_MASK       (~(P4D_SIZE - 1))
+
 /* pud is folded into pgd in case of 3-level page table */
 #define PUD_SHIFT      30
 #define PUD_SIZE       (_AC(1, UL) << PUD_SHIFT)
@@ -29,6 +37,15 @@ extern bool pgtable_l4_enabled;
 #define PMD_SIZE        (_AC(1, UL) << PMD_SHIFT)
 #define PMD_MASK        (~(PMD_SIZE - 1))
 
+/* Page 4th Directory entry */
+typedef struct {
+	unsigned long p4d;
+} p4d_t;
+
+#define p4d_val(x)	((x).p4d)
+#define __p4d(x)	((p4d_t) { (x) })
+#define PTRS_PER_P4D	(PAGE_SIZE / sizeof(p4d_t))
+
 /* Page Upper Directory entry */
 typedef struct {
 	unsigned long pud;
@@ -99,6 +116,15 @@ static inline struct page *pud_page(pud_t pud)
 	return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
 }
 
+#define mm_p4d_folded  mm_p4d_folded
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+	if (pgtable_l5_enabled)
+		return false;
+
+	return true;
+}
+
 #define mm_pud_folded  mm_pud_folded
 static inline bool mm_pud_folded(struct mm_struct *mm)
 {
@@ -128,6 +154,9 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
 #define pud_ERROR(e)   \
 	pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
 
+#define p4d_ERROR(e)   \
+	pr_err("%s:%d: bad p4d %016lx.\n", __FILE__, __LINE__, p4d_val(e))
+
 static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
 {
 	if (pgtable_l4_enabled)
@@ -166,6 +195,16 @@ static inline void p4d_clear(p4d_t *p4d)
 		set_p4d(p4d, __p4d(0));
 }
 
+static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot)
+{
+	return __p4d((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
+}
+
+static inline unsigned long _p4d_pfn(p4d_t p4d)
+{
+	return p4d_val(p4d) >> _PAGE_PFN_SHIFT;
+}
+
 static inline pud_t *p4d_pgtable(p4d_t p4d)
 {
 	if (pgtable_l4_enabled)
@@ -190,4 +229,66 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
 	return (pud_t *)p4d;
 }
 
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	if (pgtable_l5_enabled)
+		*pgdp = pgd;
+	else
+		set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) });
+}
+
+static inline int pgd_none(pgd_t pgd)
+{
+	if (pgtable_l5_enabled)
+		return (pgd_val(pgd) == 0);
+
+	return 0;
+}
+
+static inline int pgd_present(pgd_t pgd)
+{
+	if (pgtable_l5_enabled)
+		return (pgd_val(pgd) & _PAGE_PRESENT);
+
+	return 1;
+}
+
+static inline int pgd_bad(pgd_t pgd)
+{
+	if (pgtable_l5_enabled)
+		return !pgd_present(pgd);
+
+	return 0;
+}
+
+static inline void pgd_clear(pgd_t *pgd)
+{
+	if (pgtable_l5_enabled)
+		set_pgd(pgd, __pgd(0));
+}
+
+static inline p4d_t *pgd_pgtable(pgd_t pgd)
+{
+	if (pgtable_l5_enabled)
+		return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
+
+	return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) });
+}
+
+static inline struct page *pgd_page(pgd_t pgd)
+{
+	return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
+}
+
+#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+
+#define p4d_offset p4d_offset
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+{
+	if (pgtable_l5_enabled)
+		return pgd_pgtable(*pgd) + p4d_index(address);
+
+	return (p4d_t *)pgd;
+}
+
 #endif /* _ASM_RISCV_PGTABLE_64_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 152fc5454c02..922cc436b36b 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -49,7 +49,8 @@
  * position vmemmap directly below the VMALLOC region.
  */
 #ifdef CONFIG_64BIT
-#define VA_BITS		(pgtable_l4_enabled ? 48 : 39)
+#define VA_BITS		(pgtable_l5_enabled ? \
+				57 : (pgtable_l4_enabled ? 48 : 39))
 #else
 #define VA_BITS		32
 #endif
@@ -89,7 +90,6 @@
 
 #ifndef __ASSEMBLY__
 
-#include <asm-generic/pgtable-nop4d.h>
 #include <asm/page.h>
 #include <asm/tlbflush.h>
 #include <linux/mm_types.h>
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index dea9b1c31889..e190bd205376 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -78,7 +78,9 @@ static void print_mmu(struct seq_file *f)
 #if defined(CONFIG_32BIT)
 	strncpy(sv_type, "sv32", 5);
 #elif defined(CONFIG_64BIT)
-	if (pgtable_l4_enabled)
+	if (pgtable_l5_enabled)
+		strncpy(sv_type, "sv57", 5);
+	else if (pgtable_l4_enabled)
 		strncpy(sv_type, "sv48", 5);
 	else
 		strncpy(sv_type, "sv39", 5);
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 0945ac9ed682..81822db8dd10 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -38,12 +38,15 @@ EXPORT_SYMBOL(kernel_map);
 #endif
 
 #ifdef CONFIG_64BIT
-u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39;
+u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39;
 #else
 u64 satp_mode = SATP_MODE_32;
 #endif
 EXPORT_SYMBOL(satp_mode);
 
+bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL) ?
+				true : false;
+EXPORT_SYMBOL(pgtable_l5_enabled);
 bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL) ?
 				true : false;
 EXPORT_SYMBOL(pgtable_l4_enabled);
@@ -72,6 +75,8 @@ struct pt_alloc_ops {
 	phys_addr_t (*alloc_pmd)(uintptr_t va);
 	pud_t *(*get_pud_virt)(phys_addr_t pa);
 	phys_addr_t (*alloc_pud)(uintptr_t va);
+	p4d_t *(*get_p4d_virt)(phys_addr_t pa);
+	phys_addr_t (*alloc_p4d)(uintptr_t va);
 #endif
 };
 
@@ -285,6 +290,7 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
 
 pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
 static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
 static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
 
@@ -374,6 +380,16 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
 #define early_pmd      ((pmd_t *)XIP_FIXUP(early_pmd))
 #endif /* CONFIG_XIP_KERNEL */
 
+static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss;
+static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss;
+static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+
+#ifdef CONFIG_XIP_KERNEL
+#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d))
+#define fixmap_p4d     ((p4d_t *)XIP_FIXUP(fixmap_p4d))
+#define early_p4d      ((p4d_t *)XIP_FIXUP(early_p4d))
+#endif /* CONFIG_XIP_KERNEL */
+
 static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
 static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
 static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
@@ -487,6 +503,44 @@ static phys_addr_t alloc_pud_late(uintptr_t va)
 	return __pa(vaddr);
 }
 
+static p4d_t *__init get_p4d_virt_early(phys_addr_t pa)
+{
+	return (p4d_t *)((uintptr_t)pa);
+}
+
+static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa)
+{
+	clear_fixmap(FIX_P4D);
+	return (p4d_t *)set_fixmap_offset(FIX_P4D, pa);
+}
+
+static p4d_t *__init get_p4d_virt_late(phys_addr_t pa)
+{
+	return (p4d_t *)__va(pa);
+}
+
+static phys_addr_t __init alloc_p4d_early(uintptr_t va)
+{
+	/* Only one P4D is available for early mapping */
+	BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
+
+	return (uintptr_t)early_p4d;
+}
+
+static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va)
+{
+	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static phys_addr_t alloc_p4d_late(uintptr_t va)
+{
+	unsigned long vaddr;
+
+	vaddr = __get_free_page(GFP_KERNEL);
+	BUG_ON(!vaddr);
+	return __pa(vaddr);
+}
+
 static void __init create_pud_mapping(pud_t *pudp,
 				      uintptr_t va, phys_addr_t pa,
 				      phys_addr_t sz, pgprot_t prot)
@@ -514,21 +568,55 @@ static void __init create_pud_mapping(pud_t *pudp,
 	create_pmd_mapping(nextp, va, pa, sz, prot);
 }
 
-#define pgd_next_t		pud_t
-#define alloc_pgd_next(__va)	(pgtable_l4_enabled ?			\
-		pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))
-#define get_pgd_next_virt(__pa)	(pgtable_l4_enabled ?			\
-		pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa))
+static void __init create_p4d_mapping(p4d_t *p4dp,
+				      uintptr_t va, phys_addr_t pa,
+				      phys_addr_t sz, pgprot_t prot)
+{
+	pud_t *nextp;
+	phys_addr_t next_phys;
+	uintptr_t p4d_index = p4d_index(va);
+
+	if (sz == P4D_SIZE) {
+		if (p4d_val(p4dp[p4d_index]) == 0)
+			p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot);
+		return;
+	}
+
+	if (p4d_val(p4dp[p4d_index]) == 0) {
+		next_phys = pt_ops.alloc_pud(va);
+		p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE);
+		nextp = pt_ops.get_pud_virt(next_phys);
+		memset(nextp, 0, PAGE_SIZE);
+	} else {
+		next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index]));
+		nextp = pt_ops.get_pud_virt(next_phys);
+	}
+
+	create_pud_mapping(nextp, va, pa, sz, prot);
+}
+
+#define pgd_next_t		p4d_t
+#define alloc_pgd_next(__va)	(pgtable_l5_enabled ?			\
+		pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ?		\
+		pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)))
+#define get_pgd_next_virt(__pa)	(pgtable_l5_enabled ?			\
+		pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ?	\
+		pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa)))
 #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
+				(pgtable_l5_enabled ?			\
+		create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \
 				(pgtable_l4_enabled ?			\
-		create_pud_mapping(__nextp, __va, __pa, __sz, __prot) :	\
-		create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))
-#define fixmap_pgd_next		(pgtable_l4_enabled ?			\
-		(uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)
-#define trampoline_pgd_next	(pgtable_l4_enabled ?			\
-		(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)
-#define early_dtb_pgd_next	(pgtable_l4_enabled ?			\
-		(uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)
+		create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) :	\
+		create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)))
+#define fixmap_pgd_next		(pgtable_l5_enabled ?			\
+		(uintptr_t)fixmap_p4d : (pgtable_l4_enabled ?		\
+		(uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd))
+#define trampoline_pgd_next	(pgtable_l5_enabled ?			\
+		(uintptr_t)trampoline_p4d : (pgtable_l4_enabled ?	\
+		(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd))
+#define early_dtb_pgd_next	(pgtable_l5_enabled ?			\
+		(uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ?	\
+		(uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd))
 #else
 #define pgd_next_t		pte_t
 #define alloc_pgd_next(__va)	pt_ops.alloc_pte(__va)
@@ -537,6 +625,7 @@ static void __init create_pud_mapping(pud_t *pudp,
 	create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
 #define fixmap_pgd_next		((uintptr_t)fixmap_pte)
 #define early_dtb_pgd_next	((uintptr_t)early_dtb_pmd)
+#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot)
 #define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot)
 #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
 #endif /* __PAGETABLE_PMD_FOLDED */
@@ -627,6 +716,13 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
 #endif /* CONFIG_STRICT_KERNEL_RWX */
 
 #ifdef CONFIG_64BIT
+static void __init disable_pgtable_l5(void)
+{
+	pgtable_l5_enabled = false;
+	kernel_map.page_offset = PAGE_OFFSET_L4;
+	satp_mode = SATP_MODE_48;
+}
+
 static void __init disable_pgtable_l4(void)
 {
 	pgtable_l4_enabled = false;
@@ -643,8 +739,9 @@ static void __init disable_pgtable_l4(void)
 static __init void set_satp_mode(uintptr_t dtb_pa)
 {
 	u64 identity_satp, hw_satp;
-	uintptr_t set_satp_mode_pmd;
+	uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
 	int cpus_node;
+	bool check_l4 = false;
 
 	/* Check if the user asked for sv39 explicitly in the device tree */
 	cpus_node = fdt_path_offset((void *)dtb_pa, "/cpus");
@@ -658,18 +755,31 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
 				continue;
 
 			if (!strcmp(mmu_type, "riscv,sv39")) {
+				disable_pgtable_l5();
 				disable_pgtable_l4();
 				return;
 			}
 
+			if (!strcmp(mmu_type, "riscv,sv48")) {
+				check_l4 = true;
+			}
+
 			break;
 		}
 	}
 
-	set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
+retry:
+	if (check_l4)
+		disable_pgtable_l5();
+
 	create_pgd_mapping(early_pg_dir,
-			   set_satp_mode_pmd, (uintptr_t)early_pud,
+			   set_satp_mode_pmd,
+			   check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d,
 			   PGDIR_SIZE, PAGE_TABLE);
+	if (!check_l4)
+		create_p4d_mapping(early_p4d,
+				set_satp_mode_pmd, (uintptr_t)early_pud,
+				P4D_SIZE, PAGE_TABLE);
 	create_pud_mapping(early_pud,
 			   set_satp_mode_pmd, (uintptr_t)early_pmd,
 			   PUD_SIZE, PAGE_TABLE);
@@ -689,10 +799,16 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
 	hw_satp = csr_swap(CSR_SATP, 0ULL);
 	local_flush_tlb_all();
 
-	if (hw_satp != identity_satp)
+	if (hw_satp != identity_satp) {
+		if (!check_l4) {
+			check_l4 = true;
+			goto retry;
+		}
 		disable_pgtable_l4();
+	}
 
 	memset(early_pg_dir, 0, PAGE_SIZE);
+	memset(early_p4d, 0, PAGE_SIZE);
 	memset(early_pud, 0, PAGE_SIZE);
 	memset(early_pmd, 0, PAGE_SIZE);
 }
@@ -766,6 +882,10 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
 			   PGDIR_SIZE,
 			   IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
 
+	if (pgtable_l5_enabled)
+		create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
+				   (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
+
 	if (pgtable_l4_enabled)
 		create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
 				   (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
@@ -802,6 +922,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	pt_ops.get_pmd_virt = get_pmd_virt_early;
 	pt_ops.alloc_pud = alloc_pud_early;
 	pt_ops.get_pud_virt = get_pud_virt_early;
+	pt_ops.alloc_p4d = alloc_p4d_early;
+	pt_ops.get_p4d_virt = get_p4d_virt_early;
 #endif
 
 	kernel_map.virt_addr = KERNEL_LINK_ADDR;
@@ -855,6 +977,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 			   fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
 
 #ifndef __PAGETABLE_PMD_FOLDED
+	/* Setup fixmap P4D and PUD */
+	if (pgtable_l5_enabled)
+		create_p4d_mapping(fixmap_p4d, FIXADDR_START,
+				   (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
 	/* Setup fixmap PUD and PMD */
 	if (pgtable_l4_enabled)
 		create_pud_mapping(fixmap_pud, FIXADDR_START,
@@ -864,6 +990,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	/* Setup trampoline PGD and PMD */
 	create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
 			   trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+	if (pgtable_l5_enabled)
+		create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
+				   (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
 	if (pgtable_l4_enabled)
 		create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
 				   (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
@@ -938,6 +1067,8 @@ static void __init setup_vm_final(void)
 	pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
 	pt_ops.alloc_pud = alloc_pud_fixmap;
 	pt_ops.get_pud_virt = get_pud_virt_fixmap;
+	pt_ops.alloc_p4d = alloc_p4d_fixmap;
+	pt_ops.get_p4d_virt = get_p4d_virt_fixmap;
 #endif
 	/* Setup swapper PGD for fixmap */
 	create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
@@ -985,6 +1116,8 @@ static void __init setup_vm_final(void)
 	pt_ops.get_pmd_virt = get_pmd_virt_late;
 	pt_ops.alloc_pud = alloc_pud_late;
 	pt_ops.get_pud_virt = get_pud_virt_late;
+	pt_ops.alloc_p4d = alloc_p4d_late;
+	pt_ops.get_p4d_virt = get_p4d_virt_late;
 #endif
 }
 #else
-- 
2.32.0


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v1 2/2] Documentation, mm: Add Sv57 vm layout documentation
  2021-11-24 11:20 [RESEND PATCH v1 0/2] Add Sv57 page table support panqinglin2020
  2021-11-24 11:20 ` [PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation panqinglin2020
@ 2021-11-24 11:20 ` panqinglin2020
  2021-11-24 23:29 ` [RESEND PATCH v1 0/2] Add Sv57 page table support Heiko Stübner
  2 siblings, 0 replies; 9+ messages in thread
From: panqinglin2020 @ 2021-11-24 11:20 UTC (permalink / raw)
  To: paul.walmsley, palmer, aou, linux-riscv; +Cc: jeff, xuyinan, Qinglin Pan

From: Qinglin Pan <panqinglin2020@iscas.ac.cn>

This patch adds descriptions of Sv57 into vm-layout.rst. When using
Sv57, the kernel will use 0xff80000000000000 as PAGE_OFFSET.

Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
---
 Documentation/riscv/vm-layout.rst | 36 +++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst
index f10128e0a95f..5c115355a8f9 100644
--- a/Documentation/riscv/vm-layout.rst
+++ b/Documentation/riscv/vm-layout.rst
@@ -97,3 +97,39 @@ RISC-V Linux Kernel SV48
    ffffffff00000000 |   -4    GB | ffffffff7fffffff |    2 GB | modules, BPF
    ffffffff80000000 |   -2    GB | ffffffffffffffff |    2 GB | kernel
   __________________|____________|__________________|_________|____________________________________________________________
+
+
+RISC-V Linux Kernel SV57
+------------------------
+
+::
+
+  ========================================================================================================================
+        Start addr    |   Offset   |     End addr     |  Size   | VM area description
+  ========================================================================================================================
+                      |            |                  |         |
+     0000000000000000 |    0       | 00ffffffffffffff |  64 PB  | user-space virtual memory, different per mm
+    __________________|____________|__________________|_________|___________________________________________________________
+                      |            |                  |         |
+     0100000000000000 |  +64    PB | feffffffffffffff | ~16M TB | ... huge, almost 64 bits wide hole of non-canonical
+                      |            |                  |         | virtual memory addresses up to the -64 PB
+                      |            |                  |         | starting offset of kernel mappings.
+    __________________|____________|__________________|_________|___________________________________________________________
+                                                                |
+                                                                | Kernel-space virtual memory, shared between all processes:
+    ____________________________________________________________|___________________________________________________________
+                      |            |                  |         |
+     ff00000000000000 |  -64    PB | ff1fffffffffffff |    8 PB | kasan
+     ff3bfffffee00000 |  -49    PB | ff3bfffffeffffff |    2 MB | fixmap
+     ff3bffffff000000 |  -49    PB | ff3bffffffffffff |   16 MB | PCI io
+     ff3c000000000000 |  -49    PB | ff3fffffffffffff |    1 PB | vmemmap
+     ff40000000000000 |  -48    PB | ff7fffffffffffff |   16 PB | vmalloc/ioremap space
+     ff80000000000000 |  -32    PB | fffffffeffffffff |   32 PB | direct mapping of all physical memory
+    __________________|____________|__________________|_________|____________________________________________________________
+                                                                |
+                                                                | Identical layout to the 39-bit one from here on:
+    ____________________________________________________________|____________________________________________________________
+                      |            |                  |         |
+     ffffffff00000000 |   -4    GB | ffffffff7fffffff |    2 GB | modules, BPF
+     ffffffff80000000 |   -2    GB | ffffffffffffffff |    2 GB | kernel
+    __________________|____________|__________________|_________|____________________________________________________________
-- 
2.32.0


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [RESEND PATCH v1 0/2] Add Sv57 page table support
  2021-11-24 11:20 [RESEND PATCH v1 0/2] Add Sv57 page table support panqinglin2020
  2021-11-24 11:20 ` [PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation panqinglin2020
  2021-11-24 11:20 ` [PATCH v1 2/2] Documentation, mm: Add Sv57 vm layout documentation panqinglin2020
@ 2021-11-24 23:29 ` Heiko Stübner
  2 siblings, 0 replies; 9+ messages in thread
From: Heiko Stübner @ 2021-11-24 23:29 UTC (permalink / raw)
  To: paul.walmsley, palmer, aou, linux-riscv
  Cc: jeff, xuyinan, Qinglin Pan, panqinglin2020

Am Mittwoch, 24. November 2021, 12:20:19 CET schrieb panqinglin2020@iscas.ac.cn:
> From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> 
> This patchset is based on Alex's Sv48 patchset v2.
> 
> This implements Sv57 support at runtime. The kernel will try to boot with
> 5-level page table firstly when the mmu-type field in dtb is "mmu,sv57" or null,
> and will fallback to 4-level if the HW does not support it.
> 
> Tested on:
>   - qemu rv64 sv39
>   - qemu rv64 sv48
>   - qemu rv64 sv57
>   - Sifive unmatched


On a beagleV (which supports only sv39) I've tested both the limit via
the mmu-type in the devicetree and also that the fallback works when
I disable the mmu-type in the dt, so

Tested-by: Heiko Stuebner <heiko@sntech.de>



> Qinglin Pan (2):
>   riscv,mm: Add Sv57 support based on Sv48 implementation
>   Documentation, mm: Add Sv57 vm layout documentation
> 
>  Documentation/riscv/vm-layout.rst   |  36 ++++++
>  arch/riscv/Kconfig                  |   4 +-
>  arch/riscv/include/asm/csr.h        |   1 +
>  arch/riscv/include/asm/fixmap.h     |   1 +
>  arch/riscv/include/asm/page.h       |   1 +
>  arch/riscv/include/asm/pgalloc.h    |  49 ++++++++
>  arch/riscv/include/asm/pgtable-64.h | 103 ++++++++++++++++-
>  arch/riscv/include/asm/pgtable.h    |   4 +-
>  arch/riscv/kernel/cpu.c             |   4 +-
>  arch/riscv/mm/init.c                | 169 +++++++++++++++++++++++++---
>  10 files changed, 348 insertions(+), 24 deletions(-)
> 
> 





_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation
  2021-11-24 11:20 ` [PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation panqinglin2020
@ 2021-11-29 11:20   ` Alexandre ghiti
  2021-11-29 12:18     ` Heiko Stübner
  2021-11-30  3:11     ` 潘庆霖
  0 siblings, 2 replies; 9+ messages in thread
From: Alexandre ghiti @ 2021-11-29 11:20 UTC (permalink / raw)
  To: panqinglin2020, paul.walmsley, palmer, aou, linux-riscv; +Cc: jeff, xuyinan

Hi Qinglin,

On 11/24/21 12:20, panqinglin2020@iscas.ac.cn wrote:
> From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
>
> This patch adds Sv57 implementation on the top of Alex's Sv48 patchset.
> The mmu configuration will be determined on runtime, according to both
> mmu HW support and mmu-type field in the dtb. The kernel will try to
> set satp mode one by one from the configuration item to Sv39 in 64bit.
>
> Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> ---
>   arch/riscv/Kconfig                  |   4 +-
>   arch/riscv/include/asm/csr.h        |   1 +
>   arch/riscv/include/asm/fixmap.h     |   1 +
>   arch/riscv/include/asm/page.h       |   1 +
>   arch/riscv/include/asm/pgalloc.h    |  49 ++++++++
>   arch/riscv/include/asm/pgtable-64.h | 103 ++++++++++++++++-
>   arch/riscv/include/asm/pgtable.h    |   4 +-
>   arch/riscv/kernel/cpu.c             |   4 +-
>   arch/riscv/mm/init.c                | 169 +++++++++++++++++++++++++---
>   9 files changed, 312 insertions(+), 24 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index a4cadcd4e785..aac28e96d0e5 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -151,7 +151,7 @@ config PAGE_OFFSET
>   	hex
>   	default 0xC0000000 if 32BIT
>   	default 0x80000000 if 64BIT && !MMU
> -	default 0xffffc00000000000 if 64BIT
> +	default 0xff80000000000000 if 64BIT
>   
>   config ARCH_FLATMEM_ENABLE
>   	def_bool !NUMA
> @@ -196,7 +196,7 @@ config FIX_EARLYCON_MEM
>   
>   config PGTABLE_LEVELS
>   	int
> -	default 4 if 64BIT
> +	default 5 if 64BIT
>   	default 2
>   
>   config LOCKDEP_SUPPORT
> diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
> index ae711692eec9..299abdef0cd6 100644
> --- a/arch/riscv/include/asm/csr.h
> +++ b/arch/riscv/include/asm/csr.h
> @@ -47,6 +47,7 @@
>   #define SATP_PPN	_AC(0x00000FFFFFFFFFFF, UL)
>   #define SATP_MODE_39	_AC(0x8000000000000000, UL)
>   #define SATP_MODE_48	_AC(0x9000000000000000, UL)
> +#define SATP_MODE_57	_AC(0xa000000000000000, UL)
>   #define SATP_ASID_BITS	16
>   #define SATP_ASID_SHIFT	44
>   #define SATP_ASID_MASK	_AC(0xFFFF, UL)
> diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
> index 58a718573ad6..3cfece8b6568 100644
> --- a/arch/riscv/include/asm/fixmap.h
> +++ b/arch/riscv/include/asm/fixmap.h
> @@ -25,6 +25,7 @@ enum fixed_addresses {
>   	FIX_PTE,
>   	FIX_PMD,
>   	FIX_PUD,
> +	FIX_P4D,
>   	FIX_TEXT_POKE1,
>   	FIX_TEXT_POKE0,
>   	FIX_EARLYCON_MEM_BASE,
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 63334568a10e..41e0d88234d5 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -37,6 +37,7 @@
>    * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
>    * define the PAGE_OFFSET value for SV39.
>    */
> +#define PAGE_OFFSET_L4		_AC(0xffffc00000000000, UL)
>   #define PAGE_OFFSET_L3		_AC(0xffffffe000000000, UL)
>   #else
>   #define PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> index 11823004b87a..947f23d7b6af 100644
> --- a/arch/riscv/include/asm/pgalloc.h
> +++ b/arch/riscv/include/asm/pgalloc.h
> @@ -59,6 +59,26 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
>   	}
>   }
>   
> +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
> +{
> +	if (pgtable_l5_enabled) {
> +		unsigned long pfn = virt_to_pfn(p4d);
> +
> +		set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
> +	}
> +}
> +
> +static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd,
> +				     p4d_t *p4d)
> +{
> +	if (pgtable_l5_enabled) {
> +		unsigned long pfn = virt_to_pfn(p4d);
> +
> +		set_pgd_safe(pgd,
> +			     __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
> +	}
> +}
> +
>   #define pud_alloc_one pud_alloc_one
>   static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
>   {
> @@ -76,6 +96,35 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
>   }
>   
>   #define __pud_free_tlb(tlb, pud, addr)  pud_free((tlb)->mm, pud)
> +
> +#define p4d_alloc_one p4d_alloc_one
> +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
> +{
> +	if (pgtable_l5_enabled) {
> +		gfp_t gfp = GFP_PGTABLE_USER;
> +
> +		if (mm == &init_mm)
> +			gfp = GFP_PGTABLE_KERNEL;
> +		return (p4d_t *)get_zeroed_page(gfp);
> +	}
> +
> +	return NULL;
> +}
> +
> +static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d)
> +{
> +	BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
> +	free_page((unsigned long)p4d);
> +}
> +
> +#define p4d_free p4d_free
> +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
> +{
> +	if (pgtable_l5_enabled)
> +		__p4d_free(mm, p4d);
> +}
> +
> +#define __p4d_free_tlb(tlb, p4d, addr)  p4d_free((tlb)->mm, p4d)
>   #endif /* __PAGETABLE_PMD_FOLDED */
>   
>   static inline pgd_t *pgd_alloc(struct mm_struct *mm)
> diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
> index bbbdd66e5e2f..a01386d4094f 100644
> --- a/arch/riscv/include/asm/pgtable-64.h
> +++ b/arch/riscv/include/asm/pgtable-64.h
> @@ -9,16 +9,24 @@
>   #include <linux/const.h>
>   
>   extern bool pgtable_l4_enabled;
> +extern bool pgtable_l5_enabled;
>   
>   #define PGDIR_SHIFT_L3  30
>   #define PGDIR_SHIFT_L4  39
> +#define PGDIR_SHIFT_L5  48
>   #define PGDIR_SIZE_L3   (_AC(1, UL) << PGDIR_SHIFT_L3)
>   
> -#define PGDIR_SHIFT     (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)
> +#define PGDIR_SHIFT     (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \
> +		(pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3))


This syntax is very cumbersome, the best I could come up with is the 
following macro:

#define pgtable_level(l3, l4, l5) (pgtable_l5_enabled ? (l5): 
(pgtable_l4_enabled ? (l4): (l3))

And I'm wondering if a single variable that contains the number of page 
table levels would not be better actually, any idea?


>   /* Size of region mapped by a page global directory */
>   #define PGDIR_SIZE      (_AC(1, UL) << PGDIR_SHIFT)
>   #define PGDIR_MASK      (~(PGDIR_SIZE - 1))
>   
> +/* p4d is folded into pgd in case of 4-level page table */
> +#define P4D_SHIFT      39
> +#define P4D_SIZE       (_AC(1, UL) << P4D_SHIFT)
> +#define P4D_MASK       (~(P4D_SIZE - 1))
> +
>   /* pud is folded into pgd in case of 3-level page table */
>   #define PUD_SHIFT      30
>   #define PUD_SIZE       (_AC(1, UL) << PUD_SHIFT)
> @@ -29,6 +37,15 @@ extern bool pgtable_l4_enabled;
>   #define PMD_SIZE        (_AC(1, UL) << PMD_SHIFT)
>   #define PMD_MASK        (~(PMD_SIZE - 1))
>   
> +/* Page 4th Directory entry */
> +typedef struct {
> +	unsigned long p4d;
> +} p4d_t;
> +
> +#define p4d_val(x)	((x).p4d)
> +#define __p4d(x)	((p4d_t) { (x) })
> +#define PTRS_PER_P4D	(PAGE_SIZE / sizeof(p4d_t))
> +
>   /* Page Upper Directory entry */
>   typedef struct {
>   	unsigned long pud;
> @@ -99,6 +116,15 @@ static inline struct page *pud_page(pud_t pud)
>   	return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
>   }
>   
> +#define mm_p4d_folded  mm_p4d_folded
> +static inline bool mm_p4d_folded(struct mm_struct *mm)
> +{
> +	if (pgtable_l5_enabled)
> +		return false;
> +
> +	return true;
> +}
> +
>   #define mm_pud_folded  mm_pud_folded
>   static inline bool mm_pud_folded(struct mm_struct *mm)
>   {
> @@ -128,6 +154,9 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
>   #define pud_ERROR(e)   \
>   	pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
>   
> +#define p4d_ERROR(e)   \
> +	pr_err("%s:%d: bad p4d %016lx.\n", __FILE__, __LINE__, p4d_val(e))
> +
>   static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
>   {
>   	if (pgtable_l4_enabled)
> @@ -166,6 +195,16 @@ static inline void p4d_clear(p4d_t *p4d)
>   		set_p4d(p4d, __p4d(0));
>   }
>   
> +static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot)
> +{
> +	return __p4d((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
> +}
> +
> +static inline unsigned long _p4d_pfn(p4d_t p4d)
> +{
> +	return p4d_val(p4d) >> _PAGE_PFN_SHIFT;
> +}
> +
>   static inline pud_t *p4d_pgtable(p4d_t p4d)
>   {
>   	if (pgtable_l4_enabled)
> @@ -190,4 +229,66 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
>   	return (pud_t *)p4d;
>   }
>   
> +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
> +{
> +	if (pgtable_l5_enabled)
> +		*pgdp = pgd;
> +	else
> +		set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) });
> +}
> +
> +static inline int pgd_none(pgd_t pgd)
> +{
> +	if (pgtable_l5_enabled)
> +		return (pgd_val(pgd) == 0);
> +
> +	return 0;
> +}
> +
> +static inline int pgd_present(pgd_t pgd)
> +{
> +	if (pgtable_l5_enabled)
> +		return (pgd_val(pgd) & _PAGE_PRESENT);
> +
> +	return 1;
> +}
> +
> +static inline int pgd_bad(pgd_t pgd)
> +{
> +	if (pgtable_l5_enabled)
> +		return !pgd_present(pgd);
> +
> +	return 0;
> +}
> +
> +static inline void pgd_clear(pgd_t *pgd)
> +{
> +	if (pgtable_l5_enabled)
> +		set_pgd(pgd, __pgd(0));
> +}
> +
> +static inline p4d_t *pgd_pgtable(pgd_t pgd)
> +{
> +	if (pgtable_l5_enabled)
> +		return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
> +
> +	return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) });
> +}
> +
> +static inline struct page *pgd_page(pgd_t pgd)
> +{
> +	return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
> +}
> +
> +#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
> +
> +#define p4d_offset p4d_offset
> +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
> +{
> +	if (pgtable_l5_enabled)
> +		return pgd_pgtable(*pgd) + p4d_index(address);
> +
> +	return (p4d_t *)pgd;
> +}
> +
>   #endif /* _ASM_RISCV_PGTABLE_64_H */
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index 152fc5454c02..922cc436b36b 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -49,7 +49,8 @@
>    * position vmemmap directly below the VMALLOC region.
>    */
>   #ifdef CONFIG_64BIT
> -#define VA_BITS		(pgtable_l4_enabled ? 48 : 39)
> +#define VA_BITS		(pgtable_l5_enabled ? \
> +				57 : (pgtable_l4_enabled ? 48 : 39))
>   #else
>   #define VA_BITS		32
>   #endif
> @@ -89,7 +90,6 @@
>   
>   #ifndef __ASSEMBLY__
>   
> -#include <asm-generic/pgtable-nop4d.h>
>   #include <asm/page.h>
>   #include <asm/tlbflush.h>
>   #include <linux/mm_types.h>
> diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> index dea9b1c31889..e190bd205376 100644
> --- a/arch/riscv/kernel/cpu.c
> +++ b/arch/riscv/kernel/cpu.c
> @@ -78,7 +78,9 @@ static void print_mmu(struct seq_file *f)
>   #if defined(CONFIG_32BIT)
>   	strncpy(sv_type, "sv32", 5);
>   #elif defined(CONFIG_64BIT)
> -	if (pgtable_l4_enabled)
> +	if (pgtable_l5_enabled)
> +		strncpy(sv_type, "sv57", 5);
> +	else if (pgtable_l4_enabled)
>   		strncpy(sv_type, "sv48", 5);
>   	else
>   		strncpy(sv_type, "sv39", 5);
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index 0945ac9ed682..81822db8dd10 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -38,12 +38,15 @@ EXPORT_SYMBOL(kernel_map);
>   #endif
>   
>   #ifdef CONFIG_64BIT
> -u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39;
> +u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39;
>   #else
>   u64 satp_mode = SATP_MODE_32;
>   #endif
>   EXPORT_SYMBOL(satp_mode);
>   
> +bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL) ?
> +				true : false;
> +EXPORT_SYMBOL(pgtable_l5_enabled);
>   bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL) ?
>   				true : false;
>   EXPORT_SYMBOL(pgtable_l4_enabled);
> @@ -72,6 +75,8 @@ struct pt_alloc_ops {
>   	phys_addr_t (*alloc_pmd)(uintptr_t va);
>   	pud_t *(*get_pud_virt)(phys_addr_t pa);
>   	phys_addr_t (*alloc_pud)(uintptr_t va);
> +	p4d_t *(*get_p4d_virt)(phys_addr_t pa);
> +	phys_addr_t (*alloc_p4d)(uintptr_t va);
>   #endif
>   };
>   
> @@ -285,6 +290,7 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
>   static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
>   
>   pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
> +static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
>   static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
>   static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
>   
> @@ -374,6 +380,16 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
>   #define early_pmd      ((pmd_t *)XIP_FIXUP(early_pmd))
>   #endif /* CONFIG_XIP_KERNEL */
>   
> +static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss;
> +static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss;
> +static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
> +
> +#ifdef CONFIG_XIP_KERNEL
> +#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d))
> +#define fixmap_p4d     ((p4d_t *)XIP_FIXUP(fixmap_p4d))
> +#define early_p4d      ((p4d_t *)XIP_FIXUP(early_p4d))
> +#endif /* CONFIG_XIP_KERNEL */
> +
>   static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
>   static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
>   static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
> @@ -487,6 +503,44 @@ static phys_addr_t alloc_pud_late(uintptr_t va)
>   	return __pa(vaddr);
>   }
>   
> +static p4d_t *__init get_p4d_virt_early(phys_addr_t pa)
> +{
> +	return (p4d_t *)((uintptr_t)pa);
> +}
> +
> +static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa)
> +{
> +	clear_fixmap(FIX_P4D);
> +	return (p4d_t *)set_fixmap_offset(FIX_P4D, pa);
> +}
> +
> +static p4d_t *__init get_p4d_virt_late(phys_addr_t pa)
> +{
> +	return (p4d_t *)__va(pa);
> +}
> +
> +static phys_addr_t __init alloc_p4d_early(uintptr_t va)
> +{
> +	/* Only one P4D is available for early mapping */
> +	BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
> +
> +	return (uintptr_t)early_p4d;
> +}
> +
> +static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va)
> +{
> +	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
> +}
> +
> +static phys_addr_t alloc_p4d_late(uintptr_t va)
> +{
> +	unsigned long vaddr;
> +
> +	vaddr = __get_free_page(GFP_KERNEL);
> +	BUG_ON(!vaddr);
> +	return __pa(vaddr);
> +}
> +
>   static void __init create_pud_mapping(pud_t *pudp,
>   				      uintptr_t va, phys_addr_t pa,
>   				      phys_addr_t sz, pgprot_t prot)
> @@ -514,21 +568,55 @@ static void __init create_pud_mapping(pud_t *pudp,
>   	create_pmd_mapping(nextp, va, pa, sz, prot);
>   }
>   
> -#define pgd_next_t		pud_t
> -#define alloc_pgd_next(__va)	(pgtable_l4_enabled ?			\
> -		pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))
> -#define get_pgd_next_virt(__pa)	(pgtable_l4_enabled ?			\
> -		pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa))
> +static void __init create_p4d_mapping(p4d_t *p4dp,
> +				      uintptr_t va, phys_addr_t pa,
> +				      phys_addr_t sz, pgprot_t prot)
> +{
> +	pud_t *nextp;
> +	phys_addr_t next_phys;
> +	uintptr_t p4d_index = p4d_index(va);
> +
> +	if (sz == P4D_SIZE) {
> +		if (p4d_val(p4dp[p4d_index]) == 0)
> +			p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot);
> +		return;
> +	}
> +
> +	if (p4d_val(p4dp[p4d_index]) == 0) {
> +		next_phys = pt_ops.alloc_pud(va);
> +		p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE);
> +		nextp = pt_ops.get_pud_virt(next_phys);
> +		memset(nextp, 0, PAGE_SIZE);
> +	} else {
> +		next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index]));
> +		nextp = pt_ops.get_pud_virt(next_phys);
> +	}
> +
> +	create_pud_mapping(nextp, va, pa, sz, prot);
> +}
> +
> +#define pgd_next_t		p4d_t
> +#define alloc_pgd_next(__va)	(pgtable_l5_enabled ?			\
> +		pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ?		\
> +		pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)))
> +#define get_pgd_next_virt(__pa)	(pgtable_l5_enabled ?			\
> +		pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ?	\
> +		pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa)))
>   #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
> +				(pgtable_l5_enabled ?			\
> +		create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \
>   				(pgtable_l4_enabled ?			\
> -		create_pud_mapping(__nextp, __va, __pa, __sz, __prot) :	\
> -		create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))
> -#define fixmap_pgd_next		(pgtable_l4_enabled ?			\
> -		(uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)
> -#define trampoline_pgd_next	(pgtable_l4_enabled ?			\
> -		(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)
> -#define early_dtb_pgd_next	(pgtable_l4_enabled ?			\
> -		(uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)
> +		create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) :	\
> +		create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)))
> +#define fixmap_pgd_next		(pgtable_l5_enabled ?			\
> +		(uintptr_t)fixmap_p4d : (pgtable_l4_enabled ?		\
> +		(uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd))
> +#define trampoline_pgd_next	(pgtable_l5_enabled ?			\
> +		(uintptr_t)trampoline_p4d : (pgtable_l4_enabled ?	\
> +		(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd))
> +#define early_dtb_pgd_next	(pgtable_l5_enabled ?			\
> +		(uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ?	\
> +		(uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd))
>   #else
>   #define pgd_next_t		pte_t
>   #define alloc_pgd_next(__va)	pt_ops.alloc_pte(__va)
> @@ -537,6 +625,7 @@ static void __init create_pud_mapping(pud_t *pudp,
>   	create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
>   #define fixmap_pgd_next		((uintptr_t)fixmap_pte)
>   #define early_dtb_pgd_next	((uintptr_t)early_dtb_pmd)
> +#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot)
>   #define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot)
>   #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
>   #endif /* __PAGETABLE_PMD_FOLDED */
> @@ -627,6 +716,13 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
>   #endif /* CONFIG_STRICT_KERNEL_RWX */
>   
>   #ifdef CONFIG_64BIT
> +static void __init disable_pgtable_l5(void)
> +{
> +	pgtable_l5_enabled = false;
> +	kernel_map.page_offset = PAGE_OFFSET_L4;
> +	satp_mode = SATP_MODE_48;
> +}
> +
>   static void __init disable_pgtable_l4(void)
>   {
>   	pgtable_l4_enabled = false;
> @@ -643,8 +739,9 @@ static void __init disable_pgtable_l4(void)
>   static __init void set_satp_mode(uintptr_t dtb_pa)
>   {
>   	u64 identity_satp, hw_satp;
> -	uintptr_t set_satp_mode_pmd;
> +	uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
>   	int cpus_node;
> +	bool check_l4 = false;
>   
>   	/* Check if the user asked for sv39 explicitly in the device tree */
>   	cpus_node = fdt_path_offset((void *)dtb_pa, "/cpus");
> @@ -658,18 +755,31 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
>   				continue;
>   
>   			if (!strcmp(mmu_type, "riscv,sv39")) {
> +				disable_pgtable_l5();
>   				disable_pgtable_l4();
>   				return;
>   			}
>   
> +			if (!strcmp(mmu_type, "riscv,sv48")) {
> +				check_l4 = true;
> +			}
> +


If sv48 is set in the device tree, why would you test if it is supported 
below? I would take it as is, just like for sv39, I'm not sure we want 
to override this silently and make a wrong device tree work.


>   			break;
>   		}
>   	}
>   
> -	set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
> +retry:
> +	if (check_l4)
> +		disable_pgtable_l5();
> +
>   	create_pgd_mapping(early_pg_dir,
> -			   set_satp_mode_pmd, (uintptr_t)early_pud,
> +			   set_satp_mode_pmd,
> +			   check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d,
>   			   PGDIR_SIZE, PAGE_TABLE);
> +	if (!check_l4)
> +		create_p4d_mapping(early_p4d,
> +				set_satp_mode_pmd, (uintptr_t)early_pud,
> +				P4D_SIZE, PAGE_TABLE);
>   	create_pud_mapping(early_pud,
>   			   set_satp_mode_pmd, (uintptr_t)early_pmd,
>   			   PUD_SIZE, PAGE_TABLE);
> @@ -689,10 +799,16 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
>   	hw_satp = csr_swap(CSR_SATP, 0ULL);
>   	local_flush_tlb_all();
>   
> -	if (hw_satp != identity_satp)
> +	if (hw_satp != identity_satp) {
> +		if (!check_l4) {
> +			check_l4 = true;
> +			goto retry;
> +		}
>   		disable_pgtable_l4();
> +	}
>   
>   	memset(early_pg_dir, 0, PAGE_SIZE);
> +	memset(early_p4d, 0, PAGE_SIZE);
>   	memset(early_pud, 0, PAGE_SIZE);
>   	memset(early_pmd, 0, PAGE_SIZE);
>   }
> @@ -766,6 +882,10 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
>   			   PGDIR_SIZE,
>   			   IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
>   
> +	if (pgtable_l5_enabled)
> +		create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
> +				   (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
> +
>   	if (pgtable_l4_enabled)
>   		create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
>   				   (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
> @@ -802,6 +922,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
>   	pt_ops.get_pmd_virt = get_pmd_virt_early;
>   	pt_ops.alloc_pud = alloc_pud_early;
>   	pt_ops.get_pud_virt = get_pud_virt_early;
> +	pt_ops.alloc_p4d = alloc_p4d_early;
> +	pt_ops.get_p4d_virt = get_p4d_virt_early;
>   #endif
>   
>   	kernel_map.virt_addr = KERNEL_LINK_ADDR;
> @@ -855,6 +977,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
>   			   fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
>   
>   #ifndef __PAGETABLE_PMD_FOLDED
> +	/* Setup fixmap P4D and PUD */
> +	if (pgtable_l5_enabled)
> +		create_p4d_mapping(fixmap_p4d, FIXADDR_START,
> +				   (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
>   	/* Setup fixmap PUD and PMD */
>   	if (pgtable_l4_enabled)
>   		create_pud_mapping(fixmap_pud, FIXADDR_START,
> @@ -864,6 +990,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
>   	/* Setup trampoline PGD and PMD */
>   	create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
>   			   trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
> +	if (pgtable_l5_enabled)
> +		create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
> +				   (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
>   	if (pgtable_l4_enabled)
>   		create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
>   				   (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
> @@ -938,6 +1067,8 @@ static void __init setup_vm_final(void)
>   	pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
>   	pt_ops.alloc_pud = alloc_pud_fixmap;
>   	pt_ops.get_pud_virt = get_pud_virt_fixmap;
> +	pt_ops.alloc_p4d = alloc_p4d_fixmap;
> +	pt_ops.get_p4d_virt = get_p4d_virt_fixmap;
>   #endif
>   	/* Setup swapper PGD for fixmap */
>   	create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
> @@ -985,6 +1116,8 @@ static void __init setup_vm_final(void)
>   	pt_ops.get_pmd_virt = get_pmd_virt_late;
>   	pt_ops.alloc_pud = alloc_pud_late;
>   	pt_ops.get_pud_virt = get_pud_virt_late;
> +	pt_ops.alloc_p4d = alloc_p4d_late;
> +	pt_ops.get_p4d_virt = get_p4d_virt_late;
>   #endif
>   }
>   #else


You forgot to handle kasan in this patch. Actually, I'm updating kasan 
for the sv48 patchset after commit 54c5639d8f50 ("riscv: Fix asan-stack 
clang build") broke it. I'm struggling a bit as the kasan offset that is 
known at compile time must be the same for sv39, s48 and sv57, so we 
have to move the kasan region next to the kernel, but then it is not 
aligned on pgdir boundaries for sv48 and sv57, so the current kasan 
population functions must be adapted.

Anyway, I would advise you to wait for my updated patchset before 
tackling kasan for sv57.

Thanks,

Alex


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation
  2021-11-29 11:20   ` Alexandre ghiti
@ 2021-11-29 12:18     ` Heiko Stübner
  2021-11-30  3:11     ` 潘庆霖
  1 sibling, 0 replies; 9+ messages in thread
From: Heiko Stübner @ 2021-11-29 12:18 UTC (permalink / raw)
  To: panqinglin2020, paul.walmsley, palmer, aou, linux-riscv
  Cc: jeff, xuyinan, Alexandre ghiti

Am Montag, 29. November 2021, 12:20:20 CET schrieb Alexandre ghiti:
> Hi Qinglin,
> 
> On 11/24/21 12:20, panqinglin2020@iscas.ac.cn wrote:
> > From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> >
> > This patch adds Sv57 implementation on the top of Alex's Sv48 patchset.
> > The mmu configuration will be determined on runtime, according to both
> > mmu HW support and mmu-type field in the dtb. The kernel will try to
> > set satp mode one by one from the configuration item to Sv39 in 64bit.
> >
> > Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>

> > @@ -658,18 +755,31 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
> >   				continue;
> >   
> >   			if (!strcmp(mmu_type, "riscv,sv39")) {
> > +				disable_pgtable_l5();
> >   				disable_pgtable_l4();
> >   				return;
> >   			}
> >   
> > +			if (!strcmp(mmu_type, "riscv,sv48")) {
> > +				check_l4 = true;
> > +			}
> > +
> 
> 
> If sv48 is set in the device tree, why would you test if it is supported 
> below? I would take it as is, just like for sv39, I'm not sure we want 
> to override this silently and make a wrong device tree work.

or alternatively the code could emit a big WARN that a mismatch was
corrected. Getting to a point where a developer can actually see their
mistake on a console might be helpful to people :-)



_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation
  2021-11-29 11:20   ` Alexandre ghiti
  2021-11-29 12:18     ` Heiko Stübner
@ 2021-11-30  3:11     ` 潘庆霖
  2021-12-06 10:53       ` Alexandre ghiti
  1 sibling, 1 reply; 9+ messages in thread
From: 潘庆霖 @ 2021-11-30  3:11 UTC (permalink / raw)
  To: Alexandre ghiti, paul.walmsley, palmer, aou, linux-riscv; +Cc: jeff, xuyinan


Hi Alex,


On 2021/11/29 19:20, Alexandre ghiti wrote:
 > Hi Qinglin,
 >
 > On 11/24/21 12:20, panqinglin2020@iscas.ac.cn wrote:
 >> From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
 >>
 >> This patch adds Sv57 implementation on the top of Alex's Sv48 patchset.
 >> The mmu configuration will be determined on runtime, according to both
 >> mmu HW support and mmu-type field in the dtb. The kernel will try to
 >> set satp mode one by one from the configuration item to Sv39 in 64bit.
 >>
 >> Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
 >> ---
 >>   arch/riscv/Kconfig                  |   4 +-
 >>   arch/riscv/include/asm/csr.h        |   1 +
 >>   arch/riscv/include/asm/fixmap.h     |   1 +
 >>   arch/riscv/include/asm/page.h       |   1 +
 >>   arch/riscv/include/asm/pgalloc.h    |  49 ++++++++
 >>   arch/riscv/include/asm/pgtable-64.h | 103 ++++++++++++++++-
 >>   arch/riscv/include/asm/pgtable.h    |   4 +-
 >>   arch/riscv/kernel/cpu.c             |   4 +-
 >>   arch/riscv/mm/init.c                | 169 +++++++++++++++++++++++++---
 >>   9 files changed, 312 insertions(+), 24 deletions(-)
 >>
 >> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
 >> index a4cadcd4e785..aac28e96d0e5 100644
 >> --- a/arch/riscv/Kconfig
 >> +++ b/arch/riscv/Kconfig
 >> @@ -151,7 +151,7 @@ config PAGE_OFFSET
 >>       hex
 >>       default 0xC0000000 if 32BIT
 >>       default 0x80000000 if 64BIT && !MMU
 >> -    default 0xffffc00000000000 if 64BIT
 >> +    default 0xff80000000000000 if 64BIT
 >>     config ARCH_FLATMEM_ENABLE
 >>       def_bool !NUMA
 >> @@ -196,7 +196,7 @@ config FIX_EARLYCON_MEM
 >>     config PGTABLE_LEVELS
 >>       int
 >> -    default 4 if 64BIT
 >> +    default 5 if 64BIT
 >>       default 2
 >>     config LOCKDEP_SUPPORT
 >> diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
 >> index ae711692eec9..299abdef0cd6 100644
 >> --- a/arch/riscv/include/asm/csr.h
 >> +++ b/arch/riscv/include/asm/csr.h
 >> @@ -47,6 +47,7 @@
 >>   #define SATP_PPN    _AC(0x00000FFFFFFFFFFF, UL)
 >>   #define SATP_MODE_39    _AC(0x8000000000000000, UL)
 >>   #define SATP_MODE_48    _AC(0x9000000000000000, UL)
 >> +#define SATP_MODE_57    _AC(0xa000000000000000, UL)
 >>   #define SATP_ASID_BITS    16
 >>   #define SATP_ASID_SHIFT    44
 >>   #define SATP_ASID_MASK    _AC(0xFFFF, UL)
 >> diff --git a/arch/riscv/include/asm/fixmap.h 
b/arch/riscv/include/asm/fixmap.h
 >> index 58a718573ad6..3cfece8b6568 100644
 >> --- a/arch/riscv/include/asm/fixmap.h
 >> +++ b/arch/riscv/include/asm/fixmap.h
 >> @@ -25,6 +25,7 @@ enum fixed_addresses {
 >>       FIX_PTE,
 >>       FIX_PMD,
 >>       FIX_PUD,
 >> +    FIX_P4D,
 >>       FIX_TEXT_POKE1,
 >>       FIX_TEXT_POKE0,
 >>       FIX_EARLYCON_MEM_BASE,
 >> diff --git a/arch/riscv/include/asm/page.h 
b/arch/riscv/include/asm/page.h
 >> index 63334568a10e..41e0d88234d5 100644
 >> --- a/arch/riscv/include/asm/page.h
 >> +++ b/arch/riscv/include/asm/page.h
 >> @@ -37,6 +37,7 @@
 >>    * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 
address space so
 >>    * define the PAGE_OFFSET value for SV39.
 >>    */
 >> +#define PAGE_OFFSET_L4        _AC(0xffffc00000000000, UL)
 >>   #define PAGE_OFFSET_L3        _AC(0xffffffe000000000, UL)
 >>   #else
 >>   #define PAGE_OFFSET        _AC(CONFIG_PAGE_OFFSET, UL)
 >> diff --git a/arch/riscv/include/asm/pgalloc.h 
b/arch/riscv/include/asm/pgalloc.h
 >> index 11823004b87a..947f23d7b6af 100644
 >> --- a/arch/riscv/include/asm/pgalloc.h
 >> +++ b/arch/riscv/include/asm/pgalloc.h
 >> @@ -59,6 +59,26 @@ static inline void p4d_populate_safe(struct 
mm_struct *mm, p4d_t *p4d,
 >>       }
 >>   }
 >>   +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, 
p4d_t *p4d)
 >> +{
 >> +    if (pgtable_l5_enabled) {
 >> +        unsigned long pfn = virt_to_pfn(p4d);
 >> +
 >> +        set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
 >> +    }
 >> +}
 >> +
 >> +static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd,
 >> +                     p4d_t *p4d)
 >> +{
 >> +    if (pgtable_l5_enabled) {
 >> +        unsigned long pfn = virt_to_pfn(p4d);
 >> +
 >> +        set_pgd_safe(pgd,
 >> +                 __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
 >> +    }
 >> +}
 >> +
 >>   #define pud_alloc_one pud_alloc_one
 >>   static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned 
long addr)
 >>   {
 >> @@ -76,6 +96,35 @@ static inline void pud_free(struct mm_struct *mm, 
pud_t *pud)
 >>   }
 >>     #define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
 >> +
 >> +#define p4d_alloc_one p4d_alloc_one
 >> +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned 
long addr)
 >> +{
 >> +    if (pgtable_l5_enabled) {
 >> +        gfp_t gfp = GFP_PGTABLE_USER;
 >> +
 >> +        if (mm == &init_mm)
 >> +            gfp = GFP_PGTABLE_KERNEL;
 >> +        return (p4d_t *)get_zeroed_page(gfp);
 >> +    }
 >> +
 >> +    return NULL;
 >> +}
 >> +
 >> +static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d)
 >> +{
 >> +    BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
 >> +    free_page((unsigned long)p4d);
 >> +}
 >> +
 >> +#define p4d_free p4d_free
 >> +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
 >> +{
 >> +    if (pgtable_l5_enabled)
 >> +        __p4d_free(mm, p4d);
 >> +}
 >> +
 >> +#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
 >>   #endif /* __PAGETABLE_PMD_FOLDED */
 >>     static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 >> diff --git a/arch/riscv/include/asm/pgtable-64.h 
b/arch/riscv/include/asm/pgtable-64.h
 >> index bbbdd66e5e2f..a01386d4094f 100644
 >> --- a/arch/riscv/include/asm/pgtable-64.h
 >> +++ b/arch/riscv/include/asm/pgtable-64.h
 >> @@ -9,16 +9,24 @@
 >>   #include <linux/const.h>
 >>     extern bool pgtable_l4_enabled;
 >> +extern bool pgtable_l5_enabled;
 >>     #define PGDIR_SHIFT_L3  30
 >>   #define PGDIR_SHIFT_L4  39
 >> +#define PGDIR_SHIFT_L5  48
 >>   #define PGDIR_SIZE_L3   (_AC(1, UL) << PGDIR_SHIFT_L3)
 >>   -#define PGDIR_SHIFT     (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : 
PGDIR_SHIFT_L3)
 >> +#define PGDIR_SHIFT     (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \
 >> +        (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3))
 >
 >
 > This syntax is very cumbersome, the best I could come up with is the 
following macro:
 >
 > #define pgtable_level(l3, l4, l5) (pgtable_l5_enabled ? (l5): 
(pgtable_l4_enabled ? (l4): (l3))
 >
 > And I'm wondering if a single variable that contains the number of 
page table levels would not be better actually, any idea?
 >

Yes, it is a good idea to use a macro like pgtable_level. But a variable 
containing
the number of page table levels may be better. What about using such 
variable
for array index like this:

static int64_t page_table_level = 5; // set sv57 defaultly
#define PGTABLE_LEVEL_OFFSET 3
static int64_t pgdir_shift[3] = {PGDIR_SHIFT_L3, PGDIR_SHIFT_L4, 
PGDIR_SHIFT_L5};
#define PGDIR_SHIFT pgdir_shift[page_table_level - PGTABLE_LEVEL_OFFSET]

 >
 >>
 >>   #endif /* __PAGETABLE_PMD_FOLDED */
 >> @@ -627,6 +716,13 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
 >>   #endif /* CONFIG_STRICT_KERNEL_RWX */
 >>     #ifdef CONFIG_64BIT
 >> +static void __init disable_pgtable_l5(void)
 >> +{
 >> +    pgtable_l5_enabled = false;
 >> +    kernel_map.page_offset = PAGE_OFFSET_L4;
 >> +    satp_mode = SATP_MODE_48;
 >> +}
 >> +
 >>   static void __init disable_pgtable_l4(void)
 >>   {
 >>       pgtable_l4_enabled = false;
 >> @@ -643,8 +739,9 @@ static void __init disable_pgtable_l4(void)
 >>   static __init void set_satp_mode(uintptr_t dtb_pa)
 >>   {
 >>       u64 identity_satp, hw_satp;
 >> -    uintptr_t set_satp_mode_pmd;
 >> +    uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & 
PMD_MASK;
 >>       int cpus_node;
 >> +    bool check_l4 = false;
 >>         /* Check if the user asked for sv39 explicitly in the device 
tree */
 >>       cpus_node = fdt_path_offset((void *)dtb_pa, "/cpus");
 >> @@ -658,18 +755,31 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
 >>                   continue;
 >>                 if (!strcmp(mmu_type, "riscv,sv39")) {
 >> +                disable_pgtable_l5();
 >>                   disable_pgtable_l4();
 >>                   return;
 >>               }
 >>   +            if (!strcmp(mmu_type, "riscv,sv48")) {
 >> +                check_l4 = true;
 >> +            }
 >> +
 >
 >
 > If sv48 is set in the device tree, why would you test if it is 
supported below? I would take it as is, just like for sv39, I'm not sure 
we want to override this silently and make a wrong device tree work.
 >

Actually I prefer to emit a big WARN when sv48 is set in device tree
but unavailable. I think the device tree field shows the page table level
which developers expect, and the kernel should try it best to boot itself
even when the cpu doesn't meet that expectation ? As for sv39, I think any
RISC-V 64-bit device's mmu (if exists) should support it, so we don't need
to test it


 >
 >> break;
 >>           }
 >>       }
 >>   -    set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
 >> +retry:
 >> +    if (check_l4)
 >> +        disable_pgtable_l5();
 >> +
 >>       create_pgd_mapping(early_pg_dir,
 >> -               set_satp_mode_pmd, (uintptr_t)early_pud,
 >> +               set_satp_mode_pmd,
 >> +               check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d,
 >>                  PGDIR_SIZE, PAGE_TABLE);
 >> +    if (!check_l4)
 >> +        create_p4d_mapping(early_p4d,
 >> +                set_satp_mode_pmd, (uintptr_t)early_pud,
 >> +                P4D_SIZE, PAGE_TABLE);
 >>       create_pud_mapping(early_pud,
 >>                  set_satp_mode_pmd, (uintptr_t)early_pmd,
 >>                  PUD_SIZE, PAGE_TABLE);
 >> @@ -689,10 +799,16 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
 >>       hw_satp = csr_swap(CSR_SATP, 0ULL);
 >>       local_flush_tlb_all();
 >>   -    if (hw_satp != identity_satp)
 >> +    if (hw_satp != identity_satp) {
 >> +        if (!check_l4) {
 >> +            check_l4 = true;
 >> +            goto retry;
 >> +        }
 >>           disable_pgtable_l4();
 >> +    }
 >>         memset(early_pg_dir, 0, PAGE_SIZE);
 >> +    memset(early_p4d, 0, PAGE_SIZE);
 >>       memset(early_pud, 0, PAGE_SIZE);
 >>       memset(early_pmd, 0, PAGE_SIZE);
 >>   }
 >> @@ -766,6 +882,10 @@ static void __init 
create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
 >>                  PGDIR_SIZE,
 >>                  IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
 >>   +    if (pgtable_l5_enabled)
 >> +        create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
 >> +                   (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
 >> +
 >>       if (pgtable_l4_enabled)
 >>           create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
 >>                      (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
 >> @@ -802,6 +922,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 >>       pt_ops.get_pmd_virt = get_pmd_virt_early;
 >>       pt_ops.alloc_pud = alloc_pud_early;
 >>       pt_ops.get_pud_virt = get_pud_virt_early;
 >> +    pt_ops.alloc_p4d = alloc_p4d_early;
 >> +    pt_ops.get_p4d_virt = get_p4d_virt_early;
 >>   #endif
 >>         kernel_map.virt_addr = KERNEL_LINK_ADDR;
 >> @@ -855,6 +977,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 >>                  fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
 >>     #ifndef __PAGETABLE_PMD_FOLDED
 >> +    /* Setup fixmap P4D and PUD */
 >> +    if (pgtable_l5_enabled)
 >> +        create_p4d_mapping(fixmap_p4d, FIXADDR_START,
 >> +                   (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
 >>       /* Setup fixmap PUD and PMD */
 >>       if (pgtable_l4_enabled)
 >>           create_pud_mapping(fixmap_pud, FIXADDR_START,
 >> @@ -864,6 +990,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 >>       /* Setup trampoline PGD and PMD */
 >>       create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
 >>                  trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
 >> +    if (pgtable_l5_enabled)
 >> +        create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
 >> +                   (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
 >>       if (pgtable_l4_enabled)
 >>           create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
 >>                      (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
 >> @@ -938,6 +1067,8 @@ static void __init setup_vm_final(void)
 >>       pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
 >>       pt_ops.alloc_pud = alloc_pud_fixmap;
 >>       pt_ops.get_pud_virt = get_pud_virt_fixmap;
 >> +    pt_ops.alloc_p4d = alloc_p4d_fixmap;
 >> +    pt_ops.get_p4d_virt = get_p4d_virt_fixmap;
 >>   #endif
 >>       /* Setup swapper PGD for fixmap */
 >>       create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
 >> @@ -985,6 +1116,8 @@ static void __init setup_vm_final(void)
 >>       pt_ops.get_pmd_virt = get_pmd_virt_late;
 >>       pt_ops.alloc_pud = alloc_pud_late;
 >>       pt_ops.get_pud_virt = get_pud_virt_late;
 >> +    pt_ops.alloc_p4d = alloc_p4d_late;
 >> +    pt_ops.get_p4d_virt = get_p4d_virt_late;
 >>   #endif
 >>   }
 >>   #else
 >
 >
 > You forgot to handle kasan in this patch. Actually, I'm updating 
kasan for the sv48 patchset after commit 54c5639d8f50 ("riscv: Fix 
asan-stack clang build") broke it. I'm struggling a bit as the kasan 
offset that is known at compile time must be the same for sv39, s48 and 
sv57, so we have to move the kasan region next to the kernel, but then 
it is not aligned on pgdir boundaries for sv48 and sv57, so the current 
kasan population functions must be adapted.
 >
 > Anyway, I would advise you to wait for my updated patchset before 
tackling kasan for sv57.


Thanks for your suggestions. I will handle kasan in next patchset which 
should
be based on your new patchset.

Thanks,
Qinglin


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation
  2021-11-30  3:11     ` 潘庆霖
@ 2021-12-06 10:53       ` Alexandre ghiti
  0 siblings, 0 replies; 9+ messages in thread
From: Alexandre ghiti @ 2021-12-06 10:53 UTC (permalink / raw)
  To: 潘庆霖, paul.walmsley, palmer, aou, linux-riscv
  Cc: jeff, xuyinan

Hi Qinglin,

On 11/30/21 04:11, 潘庆霖 wrote:
>
> Hi Alex,
>
>
> On 2021/11/29 19:20, Alexandre ghiti wrote:
> > Hi Qinglin,
> >
> > On 11/24/21 12:20, panqinglin2020@iscas.ac.cn wrote:
> >> From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> >>
> >> This patch adds Sv57 implementation on the top of Alex's Sv48 
> patchset.
> >> The mmu configuration will be determined on runtime, according to both
> >> mmu HW support and mmu-type field in the dtb. The kernel will try to
> >> set satp mode one by one from the configuration item to Sv39 in 64bit.
> >>
> >> Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> >> ---
> >>   arch/riscv/Kconfig                  |   4 +-
> >>   arch/riscv/include/asm/csr.h        |   1 +
> >>   arch/riscv/include/asm/fixmap.h     |   1 +
> >>   arch/riscv/include/asm/page.h       |   1 +
> >>   arch/riscv/include/asm/pgalloc.h    |  49 ++++++++
> >>   arch/riscv/include/asm/pgtable-64.h | 103 ++++++++++++++++-
> >>   arch/riscv/include/asm/pgtable.h    |   4 +-
> >>   arch/riscv/kernel/cpu.c             |   4 +-
> >>   arch/riscv/mm/init.c                | 169 
> +++++++++++++++++++++++++---
> >>   9 files changed, 312 insertions(+), 24 deletions(-)
> >>
> >> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >> index a4cadcd4e785..aac28e96d0e5 100644
> >> --- a/arch/riscv/Kconfig
> >> +++ b/arch/riscv/Kconfig
> >> @@ -151,7 +151,7 @@ config PAGE_OFFSET
> >>       hex
> >>       default 0xC0000000 if 32BIT
> >>       default 0x80000000 if 64BIT && !MMU
> >> -    default 0xffffc00000000000 if 64BIT
> >> +    default 0xff80000000000000 if 64BIT
> >>     config ARCH_FLATMEM_ENABLE
> >>       def_bool !NUMA
> >> @@ -196,7 +196,7 @@ config FIX_EARLYCON_MEM
> >>     config PGTABLE_LEVELS
> >>       int
> >> -    default 4 if 64BIT
> >> +    default 5 if 64BIT
> >>       default 2
> >>     config LOCKDEP_SUPPORT
> >> diff --git a/arch/riscv/include/asm/csr.h 
> b/arch/riscv/include/asm/csr.h
> >> index ae711692eec9..299abdef0cd6 100644
> >> --- a/arch/riscv/include/asm/csr.h
> >> +++ b/arch/riscv/include/asm/csr.h
> >> @@ -47,6 +47,7 @@
> >>   #define SATP_PPN    _AC(0x00000FFFFFFFFFFF, UL)
> >>   #define SATP_MODE_39    _AC(0x8000000000000000, UL)
> >>   #define SATP_MODE_48    _AC(0x9000000000000000, UL)
> >> +#define SATP_MODE_57    _AC(0xa000000000000000, UL)
> >>   #define SATP_ASID_BITS    16
> >>   #define SATP_ASID_SHIFT    44
> >>   #define SATP_ASID_MASK    _AC(0xFFFF, UL)
> >> diff --git a/arch/riscv/include/asm/fixmap.h 
> b/arch/riscv/include/asm/fixmap.h
> >> index 58a718573ad6..3cfece8b6568 100644
> >> --- a/arch/riscv/include/asm/fixmap.h
> >> +++ b/arch/riscv/include/asm/fixmap.h
> >> @@ -25,6 +25,7 @@ enum fixed_addresses {
> >>       FIX_PTE,
> >>       FIX_PMD,
> >>       FIX_PUD,
> >> +    FIX_P4D,
> >>       FIX_TEXT_POKE1,
> >>       FIX_TEXT_POKE0,
> >>       FIX_EARLYCON_MEM_BASE,
> >> diff --git a/arch/riscv/include/asm/page.h 
> b/arch/riscv/include/asm/page.h
> >> index 63334568a10e..41e0d88234d5 100644
> >> --- a/arch/riscv/include/asm/page.h
> >> +++ b/arch/riscv/include/asm/page.h
> >> @@ -37,6 +37,7 @@
> >>    * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 
> address space so
> >>    * define the PAGE_OFFSET value for SV39.
> >>    */
> >> +#define PAGE_OFFSET_L4        _AC(0xffffc00000000000, UL)
> >>   #define PAGE_OFFSET_L3        _AC(0xffffffe000000000, UL)
> >>   #else
> >>   #define PAGE_OFFSET        _AC(CONFIG_PAGE_OFFSET, UL)
> >> diff --git a/arch/riscv/include/asm/pgalloc.h 
> b/arch/riscv/include/asm/pgalloc.h
> >> index 11823004b87a..947f23d7b6af 100644
> >> --- a/arch/riscv/include/asm/pgalloc.h
> >> +++ b/arch/riscv/include/asm/pgalloc.h
> >> @@ -59,6 +59,26 @@ static inline void p4d_populate_safe(struct 
> mm_struct *mm, p4d_t *p4d,
> >>       }
> >>   }
> >>   +static inline void pgd_populate(struct mm_struct *mm, pgd_t 
> *pgd, p4d_t *p4d)
> >> +{
> >> +    if (pgtable_l5_enabled) {
> >> +        unsigned long pfn = virt_to_pfn(p4d);
> >> +
> >> +        set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
> >> +    }
> >> +}
> >> +
> >> +static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t 
> *pgd,
> >> +                     p4d_t *p4d)
> >> +{
> >> +    if (pgtable_l5_enabled) {
> >> +        unsigned long pfn = virt_to_pfn(p4d);
> >> +
> >> +        set_pgd_safe(pgd,
> >> +                 __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
> >> +    }
> >> +}
> >> +
> >>   #define pud_alloc_one pud_alloc_one
> >>   static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned 
> long addr)
> >>   {
> >> @@ -76,6 +96,35 @@ static inline void pud_free(struct mm_struct 
> *mm, pud_t *pud)
> >>   }
> >>     #define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
> >> +
> >> +#define p4d_alloc_one p4d_alloc_one
> >> +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned 
> long addr)
> >> +{
> >> +    if (pgtable_l5_enabled) {
> >> +        gfp_t gfp = GFP_PGTABLE_USER;
> >> +
> >> +        if (mm == &init_mm)
> >> +            gfp = GFP_PGTABLE_KERNEL;
> >> +        return (p4d_t *)get_zeroed_page(gfp);
> >> +    }
> >> +
> >> +    return NULL;
> >> +}
> >> +
> >> +static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d)
> >> +{
> >> +    BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
> >> +    free_page((unsigned long)p4d);
> >> +}
> >> +
> >> +#define p4d_free p4d_free
> >> +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
> >> +{
> >> +    if (pgtable_l5_enabled)
> >> +        __p4d_free(mm, p4d);
> >> +}
> >> +
> >> +#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
> >>   #endif /* __PAGETABLE_PMD_FOLDED */
> >>     static inline pgd_t *pgd_alloc(struct mm_struct *mm)
> >> diff --git a/arch/riscv/include/asm/pgtable-64.h 
> b/arch/riscv/include/asm/pgtable-64.h
> >> index bbbdd66e5e2f..a01386d4094f 100644
> >> --- a/arch/riscv/include/asm/pgtable-64.h
> >> +++ b/arch/riscv/include/asm/pgtable-64.h
> >> @@ -9,16 +9,24 @@
> >>   #include <linux/const.h>
> >>     extern bool pgtable_l4_enabled;
> >> +extern bool pgtable_l5_enabled;
> >>     #define PGDIR_SHIFT_L3  30
> >>   #define PGDIR_SHIFT_L4  39
> >> +#define PGDIR_SHIFT_L5  48
> >>   #define PGDIR_SIZE_L3   (_AC(1, UL) << PGDIR_SHIFT_L3)
> >>   -#define PGDIR_SHIFT     (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : 
> PGDIR_SHIFT_L3)
> >> +#define PGDIR_SHIFT     (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \
> >> +        (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3))
> >
> >
> > This syntax is very cumbersome, the best I could come up with is the 
> following macro:
> >
> > #define pgtable_level(l3, l4, l5) (pgtable_l5_enabled ? (l5): 
> (pgtable_l4_enabled ? (l4): (l3))
> >
> > And I'm wondering if a single variable that contains the number of 
> page table levels would not be better actually, any idea?
> >
>
> Yes, it is a good idea to use a macro like pgtable_level. But a 
> variable containing
> the number of page table levels may be better. What about using such 
> variable
> for array index like this:
>
> static int64_t page_table_level = 5; // set sv57 defaultly
> #define PGTABLE_LEVEL_OFFSET 3
> static int64_t pgdir_shift[3] = {PGDIR_SHIFT_L3, PGDIR_SHIFT_L4, 
> PGDIR_SHIFT_L5};
> #define PGDIR_SHIFT pgdir_shift[page_table_level - PGTABLE_LEVEL_OFFSET]


I have just updated the sv48 patchset without this suggestion as it 
already contains some significant changes. I'm not opposed to your idea, 
but I have a preference for the macro as it does not use extra memory, 
but do as you want and we'll see what others think too.

Thanks,

Alex


>
> >
> >>
> >>   #endif /* __PAGETABLE_PMD_FOLDED */
> >> @@ -627,6 +716,13 @@ static __init pgprot_t 
> pgprot_from_va(uintptr_t va)
> >>   #endif /* CONFIG_STRICT_KERNEL_RWX */
> >>     #ifdef CONFIG_64BIT
> >> +static void __init disable_pgtable_l5(void)
> >> +{
> >> +    pgtable_l5_enabled = false;
> >> +    kernel_map.page_offset = PAGE_OFFSET_L4;
> >> +    satp_mode = SATP_MODE_48;
> >> +}
> >> +
> >>   static void __init disable_pgtable_l4(void)
> >>   {
> >>       pgtable_l4_enabled = false;
> >> @@ -643,8 +739,9 @@ static void __init disable_pgtable_l4(void)
> >>   static __init void set_satp_mode(uintptr_t dtb_pa)
> >>   {
> >>       u64 identity_satp, hw_satp;
> >> -    uintptr_t set_satp_mode_pmd;
> >> +    uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & 
> PMD_MASK;
> >>       int cpus_node;
> >> +    bool check_l4 = false;
> >>         /* Check if the user asked for sv39 explicitly in the 
> device tree */
> >>       cpus_node = fdt_path_offset((void *)dtb_pa, "/cpus");
> >> @@ -658,18 +755,31 @@ static __init void set_satp_mode(uintptr_t 
> dtb_pa)
> >>                   continue;
> >>                 if (!strcmp(mmu_type, "riscv,sv39")) {
> >> +                disable_pgtable_l5();
> >>                   disable_pgtable_l4();
> >>                   return;
> >>               }
> >>   +            if (!strcmp(mmu_type, "riscv,sv48")) {
> >> +                check_l4 = true;
> >> +            }
> >> +
> >
> >
> > If sv48 is set in the device tree, why would you test if it is 
> supported below? I would take it as is, just like for sv39, I'm not 
> sure we want to override this silently and make a wrong device tree work.
> >
>
> Actually I prefer to emit a big WARN when sv48 is set in device tree
> but unavailable. I think the device tree field shows the page table level
> which developers expect, and the kernel should try it best to boot itself
> even when the cpu doesn't meet that expectation ? As for sv39, I think 
> any
> RISC-V 64-bit device's mmu (if exists) should support it, so we don't 
> need
> to test it
>
>
> >
> >> break;
> >>           }
> >>       }
> >>   -    set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
> >> +retry:
> >> +    if (check_l4)
> >> +        disable_pgtable_l5();
> >> +
> >>       create_pgd_mapping(early_pg_dir,
> >> -               set_satp_mode_pmd, (uintptr_t)early_pud,
> >> +               set_satp_mode_pmd,
> >> +               check_l4 ? (uintptr_t)early_pud : 
> (uintptr_t)early_p4d,
> >>                  PGDIR_SIZE, PAGE_TABLE);
> >> +    if (!check_l4)
> >> +        create_p4d_mapping(early_p4d,
> >> +                set_satp_mode_pmd, (uintptr_t)early_pud,
> >> +                P4D_SIZE, PAGE_TABLE);
> >>       create_pud_mapping(early_pud,
> >>                  set_satp_mode_pmd, (uintptr_t)early_pmd,
> >>                  PUD_SIZE, PAGE_TABLE);
> >> @@ -689,10 +799,16 @@ static __init void set_satp_mode(uintptr_t 
> dtb_pa)
> >>       hw_satp = csr_swap(CSR_SATP, 0ULL);
> >>       local_flush_tlb_all();
> >>   -    if (hw_satp != identity_satp)
> >> +    if (hw_satp != identity_satp) {
> >> +        if (!check_l4) {
> >> +            check_l4 = true;
> >> +            goto retry;
> >> +        }
> >>           disable_pgtable_l4();
> >> +    }
> >>         memset(early_pg_dir, 0, PAGE_SIZE);
> >> +    memset(early_p4d, 0, PAGE_SIZE);
> >>       memset(early_pud, 0, PAGE_SIZE);
> >>       memset(early_pmd, 0, PAGE_SIZE);
> >>   }
> >> @@ -766,6 +882,10 @@ static void __init 
> create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
> >>                  PGDIR_SIZE,
> >>                  IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
> >>   +    if (pgtable_l5_enabled)
> >> +        create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
> >> +                   (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
> >> +
> >>       if (pgtable_l4_enabled)
> >>           create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
> >>                      (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
> >> @@ -802,6 +922,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> >>       pt_ops.get_pmd_virt = get_pmd_virt_early;
> >>       pt_ops.alloc_pud = alloc_pud_early;
> >>       pt_ops.get_pud_virt = get_pud_virt_early;
> >> +    pt_ops.alloc_p4d = alloc_p4d_early;
> >> +    pt_ops.get_p4d_virt = get_p4d_virt_early;
> >>   #endif
> >>         kernel_map.virt_addr = KERNEL_LINK_ADDR;
> >> @@ -855,6 +977,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> >>                  fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
> >>     #ifndef __PAGETABLE_PMD_FOLDED
> >> +    /* Setup fixmap P4D and PUD */
> >> +    if (pgtable_l5_enabled)
> >> +        create_p4d_mapping(fixmap_p4d, FIXADDR_START,
> >> +                   (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
> >>       /* Setup fixmap PUD and PMD */
> >>       if (pgtable_l4_enabled)
> >>           create_pud_mapping(fixmap_pud, FIXADDR_START,
> >> @@ -864,6 +990,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> >>       /* Setup trampoline PGD and PMD */
> >>       create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
> >>                  trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
> >> +    if (pgtable_l5_enabled)
> >> +        create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
> >> +                   (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
> >>       if (pgtable_l4_enabled)
> >>           create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
> >>                      (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
> >> @@ -938,6 +1067,8 @@ static void __init setup_vm_final(void)
> >>       pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
> >>       pt_ops.alloc_pud = alloc_pud_fixmap;
> >>       pt_ops.get_pud_virt = get_pud_virt_fixmap;
> >> +    pt_ops.alloc_p4d = alloc_p4d_fixmap;
> >> +    pt_ops.get_p4d_virt = get_p4d_virt_fixmap;
> >>   #endif
> >>       /* Setup swapper PGD for fixmap */
> >>       create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
> >> @@ -985,6 +1116,8 @@ static void __init setup_vm_final(void)
> >>       pt_ops.get_pmd_virt = get_pmd_virt_late;
> >>       pt_ops.alloc_pud = alloc_pud_late;
> >>       pt_ops.get_pud_virt = get_pud_virt_late;
> >> +    pt_ops.alloc_p4d = alloc_p4d_late;
> >> +    pt_ops.get_p4d_virt = get_p4d_virt_late;
> >>   #endif
> >>   }
> >>   #else
> >
> >
> > You forgot to handle kasan in this patch. Actually, I'm updating 
> kasan for the sv48 patchset after commit 54c5639d8f50 ("riscv: Fix 
> asan-stack clang build") broke it. I'm struggling a bit as the kasan 
> offset that is known at compile time must be the same for sv39, s48 
> and sv57, so we have to move the kasan region next to the kernel, but 
> then it is not aligned on pgdir boundaries for sv48 and sv57, so the 
> current kasan population functions must be adapted.
> >
> > Anyway, I would advise you to wait for my updated patchset before 
> tackling kasan for sv57.
>
>
> Thanks for your suggestions. I will handle kasan in next patchset 
> which should
> be based on your new patchset.
>
> Thanks,
> Qinglin
>
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation
  2021-11-24 11:18 [PATCH " panqinglin2020
@ 2021-11-24 11:18 ` panqinglin2020
  0 siblings, 0 replies; 9+ messages in thread
From: panqinglin2020 @ 2021-11-24 11:18 UTC (permalink / raw)
  To: paul.walmsley, palmer, aou, linux-riscv; +Cc: jeff, xuyinan, Qinglin Pan

From: Qinglin Pan <panqinglin2020@iscas.ac.cn>

This patch adds Sv57 implementation on the top of Alex's Sv48 patchset.
The mmu configuration will be determined on runtime, according to both
mmu HW support and mmu-type field in the dtb. The kernel will try to
set satp mode one by one from the configuration item to Sv39 in 64bit.

Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
---
 arch/riscv/Kconfig                  |   4 +-
 arch/riscv/include/asm/csr.h        |   1 +
 arch/riscv/include/asm/fixmap.h     |   1 +
 arch/riscv/include/asm/page.h       |   1 +
 arch/riscv/include/asm/pgalloc.h    |  49 ++++++++
 arch/riscv/include/asm/pgtable-64.h | 103 ++++++++++++++++-
 arch/riscv/include/asm/pgtable.h    |   4 +-
 arch/riscv/kernel/cpu.c             |   4 +-
 arch/riscv/mm/init.c                | 169 +++++++++++++++++++++++++---
 9 files changed, 312 insertions(+), 24 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a4cadcd4e785..aac28e96d0e5 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -151,7 +151,7 @@ config PAGE_OFFSET
 	hex
 	default 0xC0000000 if 32BIT
 	default 0x80000000 if 64BIT && !MMU
-	default 0xffffc00000000000 if 64BIT
+	default 0xff80000000000000 if 64BIT
 
 config ARCH_FLATMEM_ENABLE
 	def_bool !NUMA
@@ -196,7 +196,7 @@ config FIX_EARLYCON_MEM
 
 config PGTABLE_LEVELS
 	int
-	default 4 if 64BIT
+	default 5 if 64BIT
 	default 2
 
 config LOCKDEP_SUPPORT
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index ae711692eec9..299abdef0cd6 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -47,6 +47,7 @@
 #define SATP_PPN	_AC(0x00000FFFFFFFFFFF, UL)
 #define SATP_MODE_39	_AC(0x8000000000000000, UL)
 #define SATP_MODE_48	_AC(0x9000000000000000, UL)
+#define SATP_MODE_57	_AC(0xa000000000000000, UL)
 #define SATP_ASID_BITS	16
 #define SATP_ASID_SHIFT	44
 #define SATP_ASID_MASK	_AC(0xFFFF, UL)
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 58a718573ad6..3cfece8b6568 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -25,6 +25,7 @@ enum fixed_addresses {
 	FIX_PTE,
 	FIX_PMD,
 	FIX_PUD,
+	FIX_P4D,
 	FIX_TEXT_POKE1,
 	FIX_TEXT_POKE0,
 	FIX_EARLYCON_MEM_BASE,
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 63334568a10e..41e0d88234d5 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -37,6 +37,7 @@
  * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
  * define the PAGE_OFFSET value for SV39.
  */
+#define PAGE_OFFSET_L4		_AC(0xffffc00000000000, UL)
 #define PAGE_OFFSET_L3		_AC(0xffffffe000000000, UL)
 #else
 #define PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 11823004b87a..947f23d7b6af 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -59,6 +59,26 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
 	}
 }
 
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
+{
+	if (pgtable_l5_enabled) {
+		unsigned long pfn = virt_to_pfn(p4d);
+
+		set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+	}
+}
+
+static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd,
+				     p4d_t *p4d)
+{
+	if (pgtable_l5_enabled) {
+		unsigned long pfn = virt_to_pfn(p4d);
+
+		set_pgd_safe(pgd,
+			     __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+	}
+}
+
 #define pud_alloc_one pud_alloc_one
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
@@ -76,6 +96,35 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 }
 
 #define __pud_free_tlb(tlb, pud, addr)  pud_free((tlb)->mm, pud)
+
+#define p4d_alloc_one p4d_alloc_one
+static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	if (pgtable_l5_enabled) {
+		gfp_t gfp = GFP_PGTABLE_USER;
+
+		if (mm == &init_mm)
+			gfp = GFP_PGTABLE_KERNEL;
+		return (p4d_t *)get_zeroed_page(gfp);
+	}
+
+	return NULL;
+}
+
+static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+	BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
+	free_page((unsigned long)p4d);
+}
+
+#define p4d_free p4d_free
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+	if (pgtable_l5_enabled)
+		__p4d_free(mm, p4d);
+}
+
+#define __p4d_free_tlb(tlb, p4d, addr)  p4d_free((tlb)->mm, p4d)
 #endif /* __PAGETABLE_PMD_FOLDED */
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index bbbdd66e5e2f..a01386d4094f 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -9,16 +9,24 @@
 #include <linux/const.h>
 
 extern bool pgtable_l4_enabled;
+extern bool pgtable_l5_enabled;
 
 #define PGDIR_SHIFT_L3  30
 #define PGDIR_SHIFT_L4  39
+#define PGDIR_SHIFT_L5  48
 #define PGDIR_SIZE_L3   (_AC(1, UL) << PGDIR_SHIFT_L3)
 
-#define PGDIR_SHIFT     (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)
+#define PGDIR_SHIFT     (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \
+		(pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3))
 /* Size of region mapped by a page global directory */
 #define PGDIR_SIZE      (_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK      (~(PGDIR_SIZE - 1))
 
+/* p4d is folded into pgd in case of 4-level page table */
+#define P4D_SHIFT      39
+#define P4D_SIZE       (_AC(1, UL) << P4D_SHIFT)
+#define P4D_MASK       (~(P4D_SIZE - 1))
+
 /* pud is folded into pgd in case of 3-level page table */
 #define PUD_SHIFT      30
 #define PUD_SIZE       (_AC(1, UL) << PUD_SHIFT)
@@ -29,6 +37,15 @@ extern bool pgtable_l4_enabled;
 #define PMD_SIZE        (_AC(1, UL) << PMD_SHIFT)
 #define PMD_MASK        (~(PMD_SIZE - 1))
 
+/* Page 4th Directory entry */
+typedef struct {
+	unsigned long p4d;
+} p4d_t;
+
+#define p4d_val(x)	((x).p4d)
+#define __p4d(x)	((p4d_t) { (x) })
+#define PTRS_PER_P4D	(PAGE_SIZE / sizeof(p4d_t))
+
 /* Page Upper Directory entry */
 typedef struct {
 	unsigned long pud;
@@ -99,6 +116,15 @@ static inline struct page *pud_page(pud_t pud)
 	return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
 }
 
+#define mm_p4d_folded  mm_p4d_folded
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+	if (pgtable_l5_enabled)
+		return false;
+
+	return true;
+}
+
 #define mm_pud_folded  mm_pud_folded
 static inline bool mm_pud_folded(struct mm_struct *mm)
 {
@@ -128,6 +154,9 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
 #define pud_ERROR(e)   \
 	pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
 
+#define p4d_ERROR(e)   \
+	pr_err("%s:%d: bad p4d %016lx.\n", __FILE__, __LINE__, p4d_val(e))
+
 static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
 {
 	if (pgtable_l4_enabled)
@@ -166,6 +195,16 @@ static inline void p4d_clear(p4d_t *p4d)
 		set_p4d(p4d, __p4d(0));
 }
 
+static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot)
+{
+	return __p4d((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
+}
+
+static inline unsigned long _p4d_pfn(p4d_t p4d)
+{
+	return p4d_val(p4d) >> _PAGE_PFN_SHIFT;
+}
+
 static inline pud_t *p4d_pgtable(p4d_t p4d)
 {
 	if (pgtable_l4_enabled)
@@ -190,4 +229,66 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
 	return (pud_t *)p4d;
 }
 
+static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	if (pgtable_l5_enabled)
+		*pgdp = pgd;
+	else
+		set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) });
+}
+
+static inline int pgd_none(pgd_t pgd)
+{
+	if (pgtable_l5_enabled)
+		return (pgd_val(pgd) == 0);
+
+	return 0;
+}
+
+static inline int pgd_present(pgd_t pgd)
+{
+	if (pgtable_l5_enabled)
+		return (pgd_val(pgd) & _PAGE_PRESENT);
+
+	return 1;
+}
+
+static inline int pgd_bad(pgd_t pgd)
+{
+	if (pgtable_l5_enabled)
+		return !pgd_present(pgd);
+
+	return 0;
+}
+
+static inline void pgd_clear(pgd_t *pgd)
+{
+	if (pgtable_l5_enabled)
+		set_pgd(pgd, __pgd(0));
+}
+
+static inline p4d_t *pgd_pgtable(pgd_t pgd)
+{
+	if (pgtable_l5_enabled)
+		return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
+
+	return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) });
+}
+
+static inline struct page *pgd_page(pgd_t pgd)
+{
+	return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
+}
+
+#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+
+#define p4d_offset p4d_offset
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+{
+	if (pgtable_l5_enabled)
+		return pgd_pgtable(*pgd) + p4d_index(address);
+
+	return (p4d_t *)pgd;
+}
+
 #endif /* _ASM_RISCV_PGTABLE_64_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 152fc5454c02..922cc436b36b 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -49,7 +49,8 @@
  * position vmemmap directly below the VMALLOC region.
  */
 #ifdef CONFIG_64BIT
-#define VA_BITS		(pgtable_l4_enabled ? 48 : 39)
+#define VA_BITS		(pgtable_l5_enabled ? \
+				57 : (pgtable_l4_enabled ? 48 : 39))
 #else
 #define VA_BITS		32
 #endif
@@ -89,7 +90,6 @@
 
 #ifndef __ASSEMBLY__
 
-#include <asm-generic/pgtable-nop4d.h>
 #include <asm/page.h>
 #include <asm/tlbflush.h>
 #include <linux/mm_types.h>
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index dea9b1c31889..e190bd205376 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -78,7 +78,9 @@ static void print_mmu(struct seq_file *f)
 #if defined(CONFIG_32BIT)
 	strncpy(sv_type, "sv32", 5);
 #elif defined(CONFIG_64BIT)
-	if (pgtable_l4_enabled)
+	if (pgtable_l5_enabled)
+		strncpy(sv_type, "sv57", 5);
+	else if (pgtable_l4_enabled)
 		strncpy(sv_type, "sv48", 5);
 	else
 		strncpy(sv_type, "sv39", 5);
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 0945ac9ed682..81822db8dd10 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -38,12 +38,15 @@ EXPORT_SYMBOL(kernel_map);
 #endif
 
 #ifdef CONFIG_64BIT
-u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39;
+u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39;
 #else
 u64 satp_mode = SATP_MODE_32;
 #endif
 EXPORT_SYMBOL(satp_mode);
 
+bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL) ?
+				true : false;
+EXPORT_SYMBOL(pgtable_l5_enabled);
 bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL) ?
 				true : false;
 EXPORT_SYMBOL(pgtable_l4_enabled);
@@ -72,6 +75,8 @@ struct pt_alloc_ops {
 	phys_addr_t (*alloc_pmd)(uintptr_t va);
 	pud_t *(*get_pud_virt)(phys_addr_t pa);
 	phys_addr_t (*alloc_pud)(uintptr_t va);
+	p4d_t *(*get_p4d_virt)(phys_addr_t pa);
+	phys_addr_t (*alloc_p4d)(uintptr_t va);
 #endif
 };
 
@@ -285,6 +290,7 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
 
 pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
 static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
 static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
 
@@ -374,6 +380,16 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
 #define early_pmd      ((pmd_t *)XIP_FIXUP(early_pmd))
 #endif /* CONFIG_XIP_KERNEL */
 
+static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss;
+static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss;
+static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+
+#ifdef CONFIG_XIP_KERNEL
+#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d))
+#define fixmap_p4d     ((p4d_t *)XIP_FIXUP(fixmap_p4d))
+#define early_p4d      ((p4d_t *)XIP_FIXUP(early_p4d))
+#endif /* CONFIG_XIP_KERNEL */
+
 static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
 static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
 static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
@@ -487,6 +503,44 @@ static phys_addr_t alloc_pud_late(uintptr_t va)
 	return __pa(vaddr);
 }
 
+static p4d_t *__init get_p4d_virt_early(phys_addr_t pa)
+{
+	return (p4d_t *)((uintptr_t)pa);
+}
+
+static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa)
+{
+	clear_fixmap(FIX_P4D);
+	return (p4d_t *)set_fixmap_offset(FIX_P4D, pa);
+}
+
+static p4d_t *__init get_p4d_virt_late(phys_addr_t pa)
+{
+	return (p4d_t *)__va(pa);
+}
+
+static phys_addr_t __init alloc_p4d_early(uintptr_t va)
+{
+	/* Only one P4D is available for early mapping */
+	BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
+
+	return (uintptr_t)early_p4d;
+}
+
+static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va)
+{
+	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static phys_addr_t alloc_p4d_late(uintptr_t va)
+{
+	unsigned long vaddr;
+
+	vaddr = __get_free_page(GFP_KERNEL);
+	BUG_ON(!vaddr);
+	return __pa(vaddr);
+}
+
 static void __init create_pud_mapping(pud_t *pudp,
 				      uintptr_t va, phys_addr_t pa,
 				      phys_addr_t sz, pgprot_t prot)
@@ -514,21 +568,55 @@ static void __init create_pud_mapping(pud_t *pudp,
 	create_pmd_mapping(nextp, va, pa, sz, prot);
 }
 
-#define pgd_next_t		pud_t
-#define alloc_pgd_next(__va)	(pgtable_l4_enabled ?			\
-		pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))
-#define get_pgd_next_virt(__pa)	(pgtable_l4_enabled ?			\
-		pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa))
+static void __init create_p4d_mapping(p4d_t *p4dp,
+				      uintptr_t va, phys_addr_t pa,
+				      phys_addr_t sz, pgprot_t prot)
+{
+	pud_t *nextp;
+	phys_addr_t next_phys;
+	uintptr_t p4d_index = p4d_index(va);
+
+	if (sz == P4D_SIZE) {
+		if (p4d_val(p4dp[p4d_index]) == 0)
+			p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot);
+		return;
+	}
+
+	if (p4d_val(p4dp[p4d_index]) == 0) {
+		next_phys = pt_ops.alloc_pud(va);
+		p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE);
+		nextp = pt_ops.get_pud_virt(next_phys);
+		memset(nextp, 0, PAGE_SIZE);
+	} else {
+		next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index]));
+		nextp = pt_ops.get_pud_virt(next_phys);
+	}
+
+	create_pud_mapping(nextp, va, pa, sz, prot);
+}
+
+#define pgd_next_t		p4d_t
+#define alloc_pgd_next(__va)	(pgtable_l5_enabled ?			\
+		pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ?		\
+		pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)))
+#define get_pgd_next_virt(__pa)	(pgtable_l5_enabled ?			\
+		pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ?	\
+		pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa)))
 #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
+				(pgtable_l5_enabled ?			\
+		create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \
 				(pgtable_l4_enabled ?			\
-		create_pud_mapping(__nextp, __va, __pa, __sz, __prot) :	\
-		create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))
-#define fixmap_pgd_next		(pgtable_l4_enabled ?			\
-		(uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)
-#define trampoline_pgd_next	(pgtable_l4_enabled ?			\
-		(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)
-#define early_dtb_pgd_next	(pgtable_l4_enabled ?			\
-		(uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)
+		create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) :	\
+		create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)))
+#define fixmap_pgd_next		(pgtable_l5_enabled ?			\
+		(uintptr_t)fixmap_p4d : (pgtable_l4_enabled ?		\
+		(uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd))
+#define trampoline_pgd_next	(pgtable_l5_enabled ?			\
+		(uintptr_t)trampoline_p4d : (pgtable_l4_enabled ?	\
+		(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd))
+#define early_dtb_pgd_next	(pgtable_l5_enabled ?			\
+		(uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ?	\
+		(uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd))
 #else
 #define pgd_next_t		pte_t
 #define alloc_pgd_next(__va)	pt_ops.alloc_pte(__va)
@@ -537,6 +625,7 @@ static void __init create_pud_mapping(pud_t *pudp,
 	create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
 #define fixmap_pgd_next		((uintptr_t)fixmap_pte)
 #define early_dtb_pgd_next	((uintptr_t)early_dtb_pmd)
+#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot)
 #define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot)
 #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
 #endif /* __PAGETABLE_PMD_FOLDED */
@@ -627,6 +716,13 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
 #endif /* CONFIG_STRICT_KERNEL_RWX */
 
 #ifdef CONFIG_64BIT
+static void __init disable_pgtable_l5(void)
+{
+	pgtable_l5_enabled = false;
+	kernel_map.page_offset = PAGE_OFFSET_L4;
+	satp_mode = SATP_MODE_48;
+}
+
 static void __init disable_pgtable_l4(void)
 {
 	pgtable_l4_enabled = false;
@@ -643,8 +739,9 @@ static void __init disable_pgtable_l4(void)
 static __init void set_satp_mode(uintptr_t dtb_pa)
 {
 	u64 identity_satp, hw_satp;
-	uintptr_t set_satp_mode_pmd;
+	uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
 	int cpus_node;
+	bool check_l4 = false;
 
 	/* Check if the user asked for sv39 explicitly in the device tree */
 	cpus_node = fdt_path_offset((void *)dtb_pa, "/cpus");
@@ -658,18 +755,31 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
 				continue;
 
 			if (!strcmp(mmu_type, "riscv,sv39")) {
+				disable_pgtable_l5();
 				disable_pgtable_l4();
 				return;
 			}
 
+			if (!strcmp(mmu_type, "riscv,sv48")) {
+				check_l4 = true;
+			}
+
 			break;
 		}
 	}
 
-	set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
+retry:
+	if (check_l4)
+		disable_pgtable_l5();
+
 	create_pgd_mapping(early_pg_dir,
-			   set_satp_mode_pmd, (uintptr_t)early_pud,
+			   set_satp_mode_pmd,
+			   check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d,
 			   PGDIR_SIZE, PAGE_TABLE);
+	if (!check_l4)
+		create_p4d_mapping(early_p4d,
+				set_satp_mode_pmd, (uintptr_t)early_pud,
+				P4D_SIZE, PAGE_TABLE);
 	create_pud_mapping(early_pud,
 			   set_satp_mode_pmd, (uintptr_t)early_pmd,
 			   PUD_SIZE, PAGE_TABLE);
@@ -689,10 +799,16 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
 	hw_satp = csr_swap(CSR_SATP, 0ULL);
 	local_flush_tlb_all();
 
-	if (hw_satp != identity_satp)
+	if (hw_satp != identity_satp) {
+		if (!check_l4) {
+			check_l4 = true;
+			goto retry;
+		}
 		disable_pgtable_l4();
+	}
 
 	memset(early_pg_dir, 0, PAGE_SIZE);
+	memset(early_p4d, 0, PAGE_SIZE);
 	memset(early_pud, 0, PAGE_SIZE);
 	memset(early_pmd, 0, PAGE_SIZE);
 }
@@ -766,6 +882,10 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
 			   PGDIR_SIZE,
 			   IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
 
+	if (pgtable_l5_enabled)
+		create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
+				   (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
+
 	if (pgtable_l4_enabled)
 		create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
 				   (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
@@ -802,6 +922,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	pt_ops.get_pmd_virt = get_pmd_virt_early;
 	pt_ops.alloc_pud = alloc_pud_early;
 	pt_ops.get_pud_virt = get_pud_virt_early;
+	pt_ops.alloc_p4d = alloc_p4d_early;
+	pt_ops.get_p4d_virt = get_p4d_virt_early;
 #endif
 
 	kernel_map.virt_addr = KERNEL_LINK_ADDR;
@@ -855,6 +977,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 			   fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
 
 #ifndef __PAGETABLE_PMD_FOLDED
+	/* Setup fixmap P4D and PUD */
+	if (pgtable_l5_enabled)
+		create_p4d_mapping(fixmap_p4d, FIXADDR_START,
+				   (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
 	/* Setup fixmap PUD and PMD */
 	if (pgtable_l4_enabled)
 		create_pud_mapping(fixmap_pud, FIXADDR_START,
@@ -864,6 +990,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	/* Setup trampoline PGD and PMD */
 	create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
 			   trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+	if (pgtable_l5_enabled)
+		create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
+				   (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
 	if (pgtable_l4_enabled)
 		create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
 				   (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
@@ -938,6 +1067,8 @@ static void __init setup_vm_final(void)
 	pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
 	pt_ops.alloc_pud = alloc_pud_fixmap;
 	pt_ops.get_pud_virt = get_pud_virt_fixmap;
+	pt_ops.alloc_p4d = alloc_p4d_fixmap;
+	pt_ops.get_p4d_virt = get_p4d_virt_fixmap;
 #endif
 	/* Setup swapper PGD for fixmap */
 	create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
@@ -985,6 +1116,8 @@ static void __init setup_vm_final(void)
 	pt_ops.get_pmd_virt = get_pmd_virt_late;
 	pt_ops.alloc_pud = alloc_pud_late;
 	pt_ops.get_pud_virt = get_pud_virt_late;
+	pt_ops.alloc_p4d = alloc_p4d_late;
+	pt_ops.get_p4d_virt = get_p4d_virt_late;
 #endif
 }
 #else
-- 
2.32.0


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-12-06 10:53 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-24 11:20 [RESEND PATCH v1 0/2] Add Sv57 page table support panqinglin2020
2021-11-24 11:20 ` [PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation panqinglin2020
2021-11-29 11:20   ` Alexandre ghiti
2021-11-29 12:18     ` Heiko Stübner
2021-11-30  3:11     ` 潘庆霖
2021-12-06 10:53       ` Alexandre ghiti
2021-11-24 11:20 ` [PATCH v1 2/2] Documentation, mm: Add Sv57 vm layout documentation panqinglin2020
2021-11-24 23:29 ` [RESEND PATCH v1 0/2] Add Sv57 page table support Heiko Stübner
  -- strict thread matches above, loose matches on Subject: below --
2021-11-24 11:18 [PATCH " panqinglin2020
2021-11-24 11:18 ` [PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation panqinglin2020

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.