All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/4] riscv, mm: detect svnapot cpu support at runtime
@ 2022-08-22 15:34 panqinglin2020
  2022-08-22 15:34 ` [PATCH v4 1/4] mm: modify pte format for Svnapot panqinglin2020
                   ` (4 more replies)
  0 siblings, 5 replies; 13+ messages in thread
From: panqinglin2020 @ 2022-08-22 15:34 UTC (permalink / raw)
  To: palmer, linux-riscv; +Cc: jeff, xuyinan, Qinglin Pan

From: Qinglin Pan <panqinglin2020@iscas.ac.cn>

Svnapot is a RISC-V extension for marking contiguous 4K pages as a non-4K
page. This patch set is for using Svnapot in Linux Kernel's boot process
and hugetlb fs.

This patchset adds a Kconfig item for using Svnapot in
"Platform type"->"Svnapot support". Its default value is off, and people can set
it on if they allow kernel to detect Svnapot hardware support and leverage it.

Tested on:
  - qemu rv64 with "Svnapot support" off and svnapot=true.
  - qemu rv64 with "Svnapot support" on and svnapot=true.
  - qemu rv64 with "Svnapot support" off and svnapot=false.
  - qemu rv64 with "Svnapot support" on and svnapot=false.

Changes in v2:
  - detect Svnapot hardware support at boot time.
Changes in v3:
  - do linear mapping again if has_svnapot
Changes in v4:
  - fix some errors/warns reported by checkpatch.pl, thanks @Conor

Qinglin Pan (4):
  mm: modify pte format for Svnapot
  mm: support Svnapot in physical page linear-mapping
  mm: support Svnapot in hugetlb page
  mm: support Svnapot in huge vmap

 arch/riscv/Kconfig                   |  10 +-
 arch/riscv/include/asm/errata_list.h |  24 ++-
 arch/riscv/include/asm/hugetlb.h     |  30 +++-
 arch/riscv/include/asm/hwcap.h       |   1 +
 arch/riscv/include/asm/mmu.h         |   1 +
 arch/riscv/include/asm/page.h        |   2 +-
 arch/riscv/include/asm/pgtable-64.h  |  14 ++
 arch/riscv/include/asm/pgtable.h     |  70 +++++++-
 arch/riscv/include/asm/vmalloc.h     |  22 +++
 arch/riscv/kernel/cpu.c              |   1 +
 arch/riscv/kernel/cpufeature.c       |  18 ++
 arch/riscv/kernel/setup.c            |  11 +-
 arch/riscv/mm/hugetlbpage.c          | 250 ++++++++++++++++++++++++++-
 arch/riscv/mm/init.c                 |  47 +++--
 14 files changed, 477 insertions(+), 24 deletions(-)

-- 
2.35.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH v4 1/4] mm: modify pte format for Svnapot
  2022-08-22 15:34 [PATCH v4 0/4] riscv, mm: detect svnapot cpu support at runtime panqinglin2020
@ 2022-08-22 15:34 ` panqinglin2020
  2022-08-22 20:45   ` Conor.Dooley
                     ` (2 more replies)
  2022-08-22 15:34 ` [PATCH v4 2/4] mm: support Svnapot in physical page linear-mapping panqinglin2020
                   ` (3 subsequent siblings)
  4 siblings, 3 replies; 13+ messages in thread
From: panqinglin2020 @ 2022-08-22 15:34 UTC (permalink / raw)
  To: palmer, linux-riscv; +Cc: jeff, xuyinan, Qinglin Pan

From: Qinglin Pan <panqinglin2020@iscas.ac.cn>

This commit adds two erratas to enable/disable svnapot support, patches code
dynamicly when "svnapot" is in the "riscv,isa" field of fdt and SVNAPOT
compile option is set. It will influence the behavior of has_svnapot
function and pte_pfn function. All code dependent on svnapot should make
sure that has_svnapot return true firstly.

Also, this commit modifies PTE definition for Svnapot, and creates some
functions in pgtable.h to mark a PTE as napot and check if it is a Svnapot
PTE. Until now, only 64KB napot size is supported in draft spec, so some
macros has only 64KB version.

Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index ed66c31e4655..c43708ae7f38 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -432,6 +432,13 @@ config FPU
 
 	  If you don't know what to do here, say Y.
 
+config SVNAPOT
+	bool "Svnapot support"
+	default n
+	help
+	  Select if your CPU supports Svnapot and you want to enable it when
+	  kernel is booting.
+
 endmenu # "Platform type"
 
 menu "Kernel features"
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index 19a771085781..f22723174cd9 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -22,7 +22,8 @@
 
 #define	CPUFEATURE_SVPBMT 0
 #define	CPUFEATURE_ZICBOM 1
-#define	CPUFEATURE_NUMBER 2
+#define	CPUFEATURE_SVNAPOT 2
+#define	CPUFEATURE_NUMBER 3
 
 #ifdef __ASSEMBLY__
 
@@ -142,6 +143,27 @@ asm volatile(ALTERNATIVE_2(						\
 	    "r"((unsigned long)(_start) + (_size))			\
 	: "a0")
 
+#define ALT_SVNAPOT(_val)						\
+asm(ALTERNATIVE("li %0, 0", "li %0, 1", 0,				\
+		CPUFEATURE_SVNAPOT, CONFIG_SVNAPOT)			\
+		: "=r"(_val) :)
+
+#define ALT_SVNAPOT_PTE_PFN(_val, _napot_shift, _pfn_mask, _pfn_shift)	\
+asm(ALTERNATIVE("and %0, %1, %2\n\t"					\
+		"srli %0, %0, %3\n\t"					\
+		"nop\n\tnop\n\tnop",					\
+		"srli t3, %1, %4\n\t"					\
+		"and %0, %1, %2\n\t"					\
+		"srli %0, %0, %3\n\t"					\
+		"sub  t4, %0, t3\n\t"					\
+		"and  %0, %0, t4",					\
+		0, CPUFEATURE_SVNAPOT, CONFIG_SVNAPOT)			\
+		: "+r"(_val)						\
+		: "r"(_val),						\
+		  "r"(_pfn_mask),					\
+		  "i"(_pfn_shift),					\
+		  "i"(_napot_shift))
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 6f59ec64175e..e4c7ce5a7e1a 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -58,6 +58,7 @@ enum riscv_isa_ext_id {
 	RISCV_ISA_EXT_ZICBOM,
 	RISCV_ISA_EXT_ZIHINTPAUSE,
 	RISCV_ISA_EXT_SSTC,
+	RISCV_ISA_EXT_SVNAPOT,
 	RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
 };
 
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index dc42375c2357..a23b71cf5979 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -74,6 +74,20 @@ typedef struct {
  */
 #define _PAGE_PFN_MASK  GENMASK(53, 10)
 
+/*
+ * [63] Svnapot definitions:
+ * 0 Svnapot disabled
+ * 1 Svnapot enabled
+ */
+#define _PAGE_NAPOT_SHIFT 63
+#define _PAGE_NAPOT      (1UL << _PAGE_NAPOT_SHIFT)
+#define NAPOT_CONT64KB_ORDER 4UL
+#define NAPOT_CONT64KB_SHIFT (NAPOT_CONT64KB_ORDER + PAGE_SHIFT)
+#define NAPOT_CONT64KB_SIZE (1UL << NAPOT_CONT64KB_SHIFT)
+#define NAPOT_CONT64KB_MASK (NAPOT_CONT64KB_SIZE - 1)
+#define NAPOT_64KB_PTE_NUM (1UL << NAPOT_CONT64KB_ORDER)
+#define NAPOT_64KB_MASK (7UL << _PAGE_PFN_SHIFT)
+
 /*
  * [62:61] Svpbmt Memory Type definitions:
  *
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 7ec936910a96..37547dd04010 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -264,10 +264,41 @@ static inline pte_t pud_pte(pud_t pud)
 	return __pte(pud_val(pud));
 }
 
+static inline bool has_svnapot(void)
+{
+	u64 _val;
+
+	ALT_SVNAPOT(_val);
+	return _val;
+}
+
+#ifdef CONFIG_SVNAPOT
+
+static inline unsigned long pte_napot(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_NAPOT;
+}
+
+static inline pte_t pte_mknapot(pte_t pte, unsigned int order)
+{
+	unsigned long napot_bits = (1UL << (order - 1)) << _PAGE_PFN_SHIFT;
+	unsigned long lower_prot =
+		pte_val(pte) & ((1UL << _PAGE_PFN_SHIFT) - 1UL);
+	unsigned long upper_prot = (pte_val(pte) >> _PAGE_PFN_SHIFT)
+				   << _PAGE_PFN_SHIFT;
+
+	return __pte(upper_prot | napot_bits | lower_prot | _PAGE_NAPOT);
+}
+#endif /* CONFIG_SVNAPOT */
+
 /* Yields the page frame number (PFN) of a page table entry */
 static inline unsigned long pte_pfn(pte_t pte)
 {
-	return __page_val_to_pfn(pte_val(pte));
+	unsigned long _val  = pte_val(pte);
+
+	ALT_SVNAPOT_PTE_PFN(_val, _PAGE_NAPOT_SHIFT,
+			    _PAGE_PFN_MASK, _PAGE_PFN_SHIFT);
+	return _val;
 }
 
 #define pte_page(x)     pfn_to_page(pte_pfn(x))
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index 0be8a2403212..d2a61122c595 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -96,6 +96,7 @@ static struct riscv_isa_ext_data isa_ext_arr[] = {
 	__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
 	__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
 	__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
+	__RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
 	__RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
 };
 
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 553d755483ed..8cf52f0c5f1a 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -204,6 +204,7 @@ void __init riscv_fill_hwcap(void)
 				SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
 				SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
 				SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
+				SET_ISA_EXT_MAP("svnapot", RISCV_ISA_EXT_SVNAPOT);
 			}
 #undef SET_ISA_EXT_MAP
 		}
@@ -284,6 +285,20 @@ static bool __init_or_module cpufeature_probe_zicbom(unsigned int stage)
 	return false;
 }
 
+static bool __init_or_module cpufeature_probe_svnapot(unsigned int stage)
+{
+#ifdef CONFIG_SVNAPOT
+	switch (stage) {
+	case RISCV_ALTERNATIVES_EARLY_BOOT:
+		return false;
+	default:
+		return riscv_isa_extension_available(NULL, SVNAPOT);
+	}
+#endif
+
+	return false;
+}
+
 /*
  * Probe presence of individual extensions.
  *
@@ -301,6 +316,9 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage)
 	if (cpufeature_probe_zicbom(stage))
 		cpu_req_feature |= (1U << CPUFEATURE_ZICBOM);
 
+	if (cpufeature_probe_svnapot(stage))
+		cpu_req_feature |= (1U << CPUFEATURE_SVNAPOT);
+
 	return cpu_req_feature;
 }
 
-- 
2.35.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v4 2/4] mm: support Svnapot in physical page linear-mapping
  2022-08-22 15:34 [PATCH v4 0/4] riscv, mm: detect svnapot cpu support at runtime panqinglin2020
  2022-08-22 15:34 ` [PATCH v4 1/4] mm: modify pte format for Svnapot panqinglin2020
@ 2022-08-22 15:34 ` panqinglin2020
  2022-08-22 21:03   ` Conor.Dooley
  2022-08-22 15:34 ` [PATCH v4 3/4] mm: support Svnapot in hugetlb page panqinglin2020
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 13+ messages in thread
From: panqinglin2020 @ 2022-08-22 15:34 UTC (permalink / raw)
  To: palmer, linux-riscv; +Cc: jeff, xuyinan, Qinglin Pan

From: Qinglin Pan <panqinglin2020@iscas.ac.cn>

Svnapot is powerful when a physical region is going to mapped to a
virtual region. Kernel will do like this when mapping all allocable
physical pages to kernel vm space. This commit modifies the
create_pte_mapping function used in linear-mapping procedure, so the
kernel can be able to use Svnapot when both address and length of
physical region are 64KB align. Code here will be executed only when
other size huge page is not suitable, so it can be an addition of
PMD_SIZE and PUD_SIZE mapping.

This commit also modifies the best_map_size function to give map_size
many times instead of only once, so a memory region can be mapped by
both PMD_SIZE and 64KB napot size.

It is tested by setting qemu's memory to a 262272k region, and the
kernel can boot successfully.

Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>

diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index cedcf8ea3c76..395fdc922e9e 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -25,6 +25,7 @@ typedef struct {
 
 void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa,
 			       phys_addr_t sz, pgprot_t prot);
+void __init create_linear_mapping(void);
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_RISCV_MMU_H */
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 95ef6e2bf45c..37e6f7044ef1 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -292,13 +292,16 @@ void __init setup_arch(char **cmdline_p)
 	kasan_init();
 #endif
 
-#ifdef CONFIG_SMP
-	setup_smp();
-#endif
-
 	riscv_fill_hwcap();
 	riscv_init_cbom_blocksize();
 	apply_boot_alternatives();
+
+	if (has_svnapot())
+		create_linear_mapping();
+
+#ifdef CONFIG_SMP
+	setup_smp();
+#endif
 }
 
 static int __init topology_init(void)
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index b56a0a75533f..f54c3991f9ab 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -373,9 +373,21 @@ static void __init create_pte_mapping(pte_t *ptep,
 				      phys_addr_t sz, pgprot_t prot)
 {
 	uintptr_t pte_idx = pte_index(va);
+#ifdef CONFIG_SVNAPOT
+	pte_t pte;
+
+	if (has_svnapot() && sz == NAPOT_CONT64KB_SIZE) {
+		do {
+			pte = pfn_pte(PFN_DOWN(pa), prot);
+			ptep[pte_idx] = pte_mknapot(pte, NAPOT_CONT64KB_ORDER);
+			pte_idx++;
+			sz -= PAGE_SIZE;
+		} while (sz > 0);
+		return;
+	}
+#endif
 
 	BUG_ON(sz != PAGE_SIZE);
-
 	if (pte_none(ptep[pte_idx]))
 		ptep[pte_idx] = pfn_pte(PFN_DOWN(pa), prot);
 }
@@ -673,10 +685,18 @@ void __init create_pgd_mapping(pgd_t *pgdp,
 static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
 {
 	/* Upgrade to PMD_SIZE mappings whenever possible */
-	if ((base & (PMD_SIZE - 1)) || (size & (PMD_SIZE - 1)))
+	base &= PMD_SIZE - 1;
+	if (!base && size >= PMD_SIZE)
+		return PMD_SIZE;
+
+	if (!has_svnapot())
 		return PAGE_SIZE;
 
-	return PMD_SIZE;
+	base &= NAPOT_CONT64KB_SIZE - 1;
+	if (!base && size >= NAPOT_CONT64KB_SIZE)
+		return NAPOT_CONT64KB_SIZE;
+
+	return PAGE_SIZE;
 }
 
 #ifdef CONFIG_XIP_KERNEL
@@ -1090,18 +1110,12 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 	pt_ops_set_fixmap();
 }
 
-static void __init setup_vm_final(void)
+void __init create_linear_mapping(void)
 {
 	uintptr_t va, map_size;
 	phys_addr_t pa, start, end;
 	u64 i;
 
-	/* Setup swapper PGD for fixmap */
-	create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
-			   __pa_symbol(fixmap_pgd_next),
-			   PGDIR_SIZE, PAGE_TABLE);
-
-	/* Map all memory banks in the linear mapping */
 	for_each_mem_range(i, &start, &end) {
 		if (start >= end)
 			break;
@@ -1111,14 +1125,25 @@ static void __init setup_vm_final(void)
 		if (end >= __pa(PAGE_OFFSET) + memory_limit)
 			end = __pa(PAGE_OFFSET) + memory_limit;
 
-		map_size = best_map_size(start, end - start);
 		for (pa = start; pa < end; pa += map_size) {
 			va = (uintptr_t)__va(pa);
+			map_size = best_map_size(pa, end - pa);
 
 			create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
 					   pgprot_from_va(va));
 		}
 	}
+}
+
+static void __init setup_vm_final(void)
+{
+	/* Setup swapper PGD for fixmap */
+	create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
+			   __pa_symbol(fixmap_pgd_next),
+			   PGDIR_SIZE, PAGE_TABLE);
+
+	/* Map all memory banks in the linear mapping */
+	create_linear_mapping();
 
 	/* Map the kernel */
 	if (IS_ENABLED(CONFIG_64BIT))
-- 
2.35.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v4 3/4] mm: support Svnapot in hugetlb page
  2022-08-22 15:34 [PATCH v4 0/4] riscv, mm: detect svnapot cpu support at runtime panqinglin2020
  2022-08-22 15:34 ` [PATCH v4 1/4] mm: modify pte format for Svnapot panqinglin2020
  2022-08-22 15:34 ` [PATCH v4 2/4] mm: support Svnapot in physical page linear-mapping panqinglin2020
@ 2022-08-22 15:34 ` panqinglin2020
  2022-08-22 21:08   ` Conor.Dooley
  2022-08-22 15:34 ` [PATCH v4 4/4] mm: support Svnapot in huge vmap panqinglin2020
  2022-08-22 21:22 ` [PATCH v4 0/4] riscv, mm: detect svnapot cpu support at runtime Conor.Dooley
  4 siblings, 1 reply; 13+ messages in thread
From: panqinglin2020 @ 2022-08-22 15:34 UTC (permalink / raw)
  To: palmer, linux-riscv; +Cc: jeff, xuyinan, Qinglin Pan

From: Qinglin Pan <panqinglin2020@iscas.ac.cn>

Svnapot can be used to support 64KB hugetlb page, so it can become a new
option when using hugetlbfs. This commit adds a basic implementation of
hugetlb page, and support 64KB as a size in it by using Svnapot.

For test, boot kernel with command line contains "default_hugepagesz=64K
hugepagesz=64K hugepages=20" and run a simple test like this:

int main() {
	void *addr;
	addr = mmap(NULL, 64 * 1024, PROT_WRITE | PROT_READ,
			MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_64KB, -1, 0);
	printf("back from mmap \n");
	long *ptr = (long *)addr;
	unsigned int i = 0;
	for(; i < 8 * 1024;i += 512) {
		printf("%lp \n", ptr);
		*ptr = 0xdeafabcd12345678;
		ptr += 512;
	}
	ptr = (long *)addr;
	i = 0;
	for(; i < 8 * 1024;i += 512) {
		if (*ptr != 0xdeafabcd12345678) {
			printf("failed! 0x%lx \n", *ptr);
			break;
		}
		ptr += 512;
	}
	if(i == 8 * 1024)
		printf("simple test passed!\n");
}

And it should be passed.

Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index c43708ae7f38..9aaec147a860 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -43,7 +43,7 @@ config RISCV
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
 	select ARCH_WANT_FRAME_POINTERS
-	select ARCH_WANT_GENERAL_HUGETLB
+	select ARCH_WANT_GENERAL_HUGETLB if !SVNAPOT
 	select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
 	select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
 	select BUILDTIME_TABLE_SORT if MMU
diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h
index a5c2ca1d1cd8..d315625542c8 100644
--- a/arch/riscv/include/asm/hugetlb.h
+++ b/arch/riscv/include/asm/hugetlb.h
@@ -2,7 +2,35 @@
 #ifndef _ASM_RISCV_HUGETLB_H
 #define _ASM_RISCV_HUGETLB_H
 
-#include <asm-generic/hugetlb.h>
 #include <asm/page.h>
 
+#ifdef CONFIG_SVNAPOT
+pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags);
+#define arch_make_huge_pte arch_make_huge_pte
+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
+void set_huge_pte_at(struct mm_struct *mm,
+		     unsigned long addr, pte_t *ptep, pte_t pte);
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+			      unsigned long addr, pte_t *ptep);
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
+pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+			    unsigned long addr, pte_t *ptep);
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
+int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+			       unsigned long addr, pte_t *ptep,
+			       pte_t pte, int dirty);
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+void huge_ptep_set_wrprotect(struct mm_struct *mm,
+			     unsigned long addr, pte_t *ptep);
+#define __HAVE_ARCH_HUGE_PTE_CLEAR
+void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+		    pte_t *ptep, unsigned long sz);
+#define set_huge_swap_pte_at riscv_set_huge_swap_pte_at
+void riscv_set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte, unsigned long sz);
+#endif /*CONFIG_SVNAPOT*/
+
+#include <asm-generic/hugetlb.h>
+
 #endif /* _ASM_RISCV_HUGETLB_H */
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index ac70b0fd9a9a..1ea06476902a 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -17,7 +17,7 @@
 #define PAGE_MASK	(~(PAGE_SIZE - 1))
 
 #ifdef CONFIG_64BIT
-#define HUGE_MAX_HSTATE		2
+#define HUGE_MAX_HSTATE		3
 #else
 #define HUGE_MAX_HSTATE		1
 #endif
diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
index 932dadfdca54..71417f228624 100644
--- a/arch/riscv/mm/hugetlbpage.c
+++ b/arch/riscv/mm/hugetlbpage.c
@@ -2,6 +2,239 @@
 #include <linux/hugetlb.h>
 #include <linux/err.h>
 
+#ifdef CONFIG_SVNAPOT
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+		      struct vm_area_struct *vma,
+		      unsigned long addr,
+		      unsigned long sz)
+{
+	pgd_t *pgdp = pgd_offset(mm, addr);
+	p4d_t *p4dp = p4d_alloc(mm, pgdp, addr);
+	pud_t *pudp = pud_alloc(mm, p4dp, addr);
+	pmd_t *pmdp = pmd_alloc(mm, pudp, addr);
+
+	if (sz == NAPOT_CONT64KB_SIZE) {
+		if (!pmdp)
+			return NULL;
+		WARN_ON(addr & (sz - 1));
+		return pte_alloc_map(mm, pmdp, addr);
+	}
+
+	return NULL;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm,
+		       unsigned long addr,
+		       unsigned long sz)
+{
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
+	pmd_t *pmdp;
+	pte_t *ptep = NULL;
+
+	pgdp = pgd_offset(mm, addr);
+	if (!pgd_present(READ_ONCE(*pgdp)))
+		return NULL;
+
+	p4dp = p4d_offset(pgdp, addr);
+	if (!p4d_present(READ_ONCE(*p4dp)))
+		return NULL;
+
+	pudp = pud_offset(p4dp, addr);
+	if (!pud_present(READ_ONCE(*pudp)))
+		return NULL;
+
+	pmdp = pmd_offset(pudp, addr);
+	if (!pmd_present(READ_ONCE(*pmdp)))
+		return NULL;
+
+	if (sz == NAPOT_CONT64KB_SIZE)
+		ptep = pte_offset_kernel(pmdp, (addr & ~NAPOT_CONT64KB_MASK));
+
+	return ptep;
+}
+
+int napot_pte_num(pte_t pte)
+{
+	if (!(pte_val(pte) & NAPOT_64KB_MASK))
+		return NAPOT_64KB_PTE_NUM;
+
+	pr_warn("%s: unrecognized napot pte size 0x%lx\n",
+		__func__, pte_val(pte));
+	return 1;
+}
+
+static pte_t get_clear_flush(struct mm_struct *mm,
+			     unsigned long addr,
+			     pte_t *ptep,
+			     unsigned long pte_num)
+{
+	pte_t orig_pte = huge_ptep_get(ptep);
+	bool valid = pte_val(orig_pte);
+	unsigned long i, saddr = addr;
+
+	for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) {
+		pte_t pte = ptep_get_and_clear(mm, addr, ptep);
+
+		if (pte_dirty(pte))
+			orig_pte = pte_mkdirty(orig_pte);
+
+		if (pte_young(pte))
+			orig_pte = pte_mkyoung(orig_pte);
+	}
+
+	if (valid) {
+		struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
+
+		flush_tlb_range(&vma, saddr, addr);
+	}
+	return orig_pte;
+}
+
+static void clear_flush(struct mm_struct *mm,
+			unsigned long addr,
+			pte_t *ptep,
+			unsigned long pte_num)
+{
+	struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
+	unsigned long i, saddr = addr;
+
+	for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
+		pte_clear(mm, addr, ptep);
+
+	flush_tlb_range(&vma, saddr, addr);
+}
+
+pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
+{
+	if (shift == NAPOT_CONT64KB_SHIFT)
+		entry = pte_mknapot(entry, NAPOT_CONT64KB_SHIFT - PAGE_SHIFT);
+
+	return entry;
+}
+
+void set_huge_pte_at(struct mm_struct *mm,
+		     unsigned long addr,
+		     pte_t *ptep,
+		     pte_t pte)
+{
+	int i;
+	int pte_num;
+
+	if (!pte_napot(pte)) {
+		set_pte_at(mm, addr, ptep, pte);
+		return;
+	}
+
+	pte_num = napot_pte_num(pte);
+	for (i = 0; i < pte_num; i++, ptep++, addr += PAGE_SIZE)
+		set_pte_at(mm, addr, ptep, pte);
+}
+
+int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+			       unsigned long addr,
+			       pte_t *ptep,
+			       pte_t pte,
+			       int dirty)
+{
+	pte_t orig_pte;
+	int i;
+	int pte_num;
+
+	if (!pte_napot(pte))
+		return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+
+	pte_num = napot_pte_num(pte);
+	ptep = huge_pte_offset(vma->vm_mm, addr, NAPOT_CONT64KB_SIZE);
+	orig_pte = huge_ptep_get(ptep);
+
+	if (pte_dirty(orig_pte))
+		pte = pte_mkdirty(pte);
+
+	if (pte_young(orig_pte))
+		pte = pte_mkyoung(pte);
+
+	for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
+		ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+
+	return true;
+}
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+			      unsigned long addr,
+			      pte_t *ptep)
+{
+	int pte_num;
+	pte_t orig_pte = huge_ptep_get(ptep);
+
+	if (!pte_napot(orig_pte))
+		return ptep_get_and_clear(mm, addr, ptep);
+
+	pte_num = napot_pte_num(orig_pte);
+	return get_clear_flush(mm, addr, ptep, pte_num);
+}
+
+void huge_ptep_set_wrprotect(struct mm_struct *mm,
+			     unsigned long addr,
+			     pte_t *ptep)
+{
+	int i;
+	int pte_num;
+	pte_t pte = READ_ONCE(*ptep);
+
+	if (!pte_napot(pte))
+		return ptep_set_wrprotect(mm, addr, ptep);
+
+	pte_num = napot_pte_num(pte);
+	ptep = huge_pte_offset(mm, addr, NAPOT_CONT64KB_SIZE);
+
+	for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
+		ptep_set_wrprotect(mm, addr, ptep);
+}
+
+pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+			    unsigned long addr,
+			    pte_t *ptep)
+{
+	int pte_num;
+	pte_t pte = READ_ONCE(*ptep);
+
+	if (!pte_napot(pte)) {
+		ptep_clear_flush(vma, addr, ptep);
+		return pte;
+	}
+
+	pte_num = napot_pte_num(pte);
+	clear_flush(vma->vm_mm, addr, ptep, pte_num);
+
+	return pte;
+}
+
+void huge_pte_clear(struct mm_struct *mm,
+		    unsigned long addr,
+		    pte_t *ptep,
+		    unsigned long sz)
+{
+	int i, pte_num;
+
+	pte_num = napot_pte_num(READ_ONCE(*ptep));
+	for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
+		pte_clear(mm, addr, ptep);
+}
+
+void riscv_set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte, unsigned long sz)
+{
+	int i, pte_num;
+
+	pte_num = napot_pte_num(READ_ONCE(*ptep));
+
+	for (i = 0; i < pte_num; i++, ptep++)
+		set_pte(ptep, pte);
+}
+#endif /*CONFIG_SVNAPOT*/
+
 int pud_huge(pud_t pud)
 {
 	return pud_leaf(pud);
@@ -18,17 +251,26 @@ bool __init arch_hugetlb_valid_size(unsigned long size)
 		return true;
 	else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE)
 		return true;
+#ifdef CONFIG_SVNAPOT
+	else if (has_svnapot() && size == NAPOT_CONT64KB_SIZE)
+		return true;
+#endif /*CONFIG_SVNAPOT*/
 	else
 		return false;
 }
 
-#ifdef CONFIG_CONTIG_ALLOC
-static __init int gigantic_pages_init(void)
+static __init int hugetlbpage_init(void)
 {
+#ifdef CONFIG_CONTIG_ALLOC
 	/* With CONTIG_ALLOC, we can allocate gigantic pages at runtime */
 	if (IS_ENABLED(CONFIG_64BIT))
 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+#endif /*CONFIG_CONTIG_ALLOC*/
+	hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
+#ifdef CONFIG_SVNAPOT
+	if (has_svnapot())
+		hugetlb_add_hstate(NAPOT_CONT64KB_SHIFT - PAGE_SHIFT);
+#endif /*CONFIG_SVNAPOT*/
 	return 0;
 }
-arch_initcall(gigantic_pages_init);
-#endif
+arch_initcall(hugetlbpage_init);
-- 
2.35.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH v4 4/4] mm: support Svnapot in huge vmap
  2022-08-22 15:34 [PATCH v4 0/4] riscv, mm: detect svnapot cpu support at runtime panqinglin2020
                   ` (2 preceding siblings ...)
  2022-08-22 15:34 ` [PATCH v4 3/4] mm: support Svnapot in hugetlb page panqinglin2020
@ 2022-08-22 15:34 ` panqinglin2020
  2022-08-22 21:13   ` Conor.Dooley
  2022-08-22 21:22 ` [PATCH v4 0/4] riscv, mm: detect svnapot cpu support at runtime Conor.Dooley
  4 siblings, 1 reply; 13+ messages in thread
From: panqinglin2020 @ 2022-08-22 15:34 UTC (permalink / raw)
  To: palmer, linux-riscv; +Cc: jeff, xuyinan, Qinglin Pan

From: Qinglin Pan <panqinglin2020@iscas.ac.cn>

The HAVE_ARCH_HUGE_VMAP option can be used to help implement arch
special huge vmap size. This commit selects this option by default and
re-writes the arch_vmap_pte_range_map_size for Svnapot 64KB size.

It can be tested when booting kernel in qemu with pci device, which
will make the kernel to call pci driver using ioremap, and the
re-written function will be called.

Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 9aaec147a860..a420325a24ac 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -70,6 +70,7 @@ config RISCV
 	select GENERIC_TIME_VSYSCALL if MMU && 64BIT
 	select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_HUGE_VMAP
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL
 	select HAVE_ARCH_KASAN if MMU && 64BIT
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 37547dd04010..6d5caa1a6bd9 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -750,6 +750,43 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
+{
+	return 0;
+}
+
+static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
+{
+	return 0;
+}
+
+static inline void p4d_clear_huge(p4d_t *p4d) { }
+
+static inline int pud_clear_huge(pud_t *pud)
+{
+	return 0;
+}
+
+static inline int pmd_clear_huge(pmd_t *pmd)
+{
+	return 0;
+}
+
+static inline int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
+{
+	return 0;
+}
+
+static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
+{
+	return 0;
+}
+
+static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
+{
+	return 0;
+}
+
 /*
  * Encode and decode a swap entry
  *
diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h
index ff9abc00d139..d92880fbfcde 100644
--- a/arch/riscv/include/asm/vmalloc.h
+++ b/arch/riscv/include/asm/vmalloc.h
@@ -1,4 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 #ifndef _ASM_RISCV_VMALLOC_H
 #define _ASM_RISCV_VMALLOC_H
 
+#include <linux/pgtable.h>
+
+#ifdef CONFIG_SVNAPOT
+#define arch_vmap_pte_range_map_size vmap_pte_range_map_size
+static inline unsigned long vmap_pte_range_map_size(unsigned long addr,
+						    unsigned long end,
+						    u64 pfn,
+						    unsigned int max_page_shift)
+{
+	bool is_napot_addr = !(addr & NAPOT_CONT64KB_MASK);
+	bool pfn_align_napot = !(pfn & (NAPOT_64KB_PTE_NUM - 1UL));
+	bool space_enough = ((end - addr) >= NAPOT_CONT64KB_SIZE);
+
+	if (has_svnapot() && is_napot_addr && pfn_align_napot &&
+	    space_enough && max_page_shift >= NAPOT_CONT64KB_SHIFT)
+		return NAPOT_CONT64KB_SIZE;
+
+	return PAGE_SIZE;
+}
+#endif /*CONFIG_SVNAPOT*/
+
 #endif /* _ASM_RISCV_VMALLOC_H */
-- 
2.35.1


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH v4 1/4] mm: modify pte format for Svnapot
  2022-08-22 15:34 ` [PATCH v4 1/4] mm: modify pte format for Svnapot panqinglin2020
@ 2022-08-22 20:45   ` Conor.Dooley
  2022-08-22 20:56   ` Conor.Dooley
  2022-08-24 17:37   ` Heiko Stübner
  2 siblings, 0 replies; 13+ messages in thread
From: Conor.Dooley @ 2022-08-22 20:45 UTC (permalink / raw)
  To: panqinglin2020, palmer, linux-riscv; +Cc: jeff, xuyinan

Hey Qinglin Pan,
Couple comments below.

On 22/08/2022 16:34, panqinglin2020@iscas.ac.cn wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> 
> This commit adds two erratas to enable/disable svnapot support, patches code
> dynamicly when "svnapot" is in the "riscv,isa" field of fdt and SVNAPOT
> compile option is set. It will influence the behavior of has_svnapot
> function and pte_pfn function. All code dependent on svnapot should make
> sure that has_svnapot return true firstly.
> 
> Also, this commit modifies PTE definition for Svnapot, and creates some
> functions in pgtable.h to mark a PTE as napot and check if it is a Svnapot
> PTE. Until now, only 64KB napot size is supported in draft spec, so some
> macros has only 64KB version.
> 
> Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> 



> diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
> index dc42375c2357..a23b71cf5979 100644
> --- a/arch/riscv/include/asm/pgtable-64.h
> +++ b/arch/riscv/include/asm/pgtable-64.h
> @@ -74,6 +74,20 @@ typedef struct {
>   */
>  #define _PAGE_PFN_MASK  GENMASK(53, 10)
> 
> +/*
> + * [63] Svnapot definitions:
> + * 0 Svnapot disabled
> + * 1 Svnapot enabled
> + */
> +#define _PAGE_NAPOT_SHIFT 63
> +#define _PAGE_NAPOT      (1UL << _PAGE_NAPOT_SHIFT)

Is there any reason not to just make this BIT(_PAGE_NAPOT_SHIFT)?

> +#define NAPOT_CONT64KB_ORDER 4UL
> +#define NAPOT_CONT64KB_SHIFT (NAPOT_CONT64KB_ORDER + PAGE_SHIFT)
> +#define NAPOT_CONT64KB_SIZE (1UL << NAPOT_CONT64KB_SHIFT)

Ditto here, BIT(NAPOT_CONT64KB_SHIFT)?

> +#define NAPOT_CONT64KB_MASK (NAPOT_CONT64KB_SIZE - 1)

GENMASK(NAPOT_CONT64KB_SIZE, 0) no?

> +#define NAPOT_64KB_PTE_NUM (1UL << NAPOT_CONT64KB_ORDER)

BIT(NAPOT_CONT64KB_ORDER)?

> +#define NAPOT_64KB_MASK (7UL << _PAGE_PFN_SHIFT)

GENMASK() here too maybe? But not sure if it adds to readability.

> +
>  /*
>   * [62:61] Svpbmt Memory Type definitions:
>   *
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index 7ec936910a96..37547dd04010 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -264,10 +264,41 @@ static inline pte_t pud_pte(pud_t pud)
>         return __pte(pud_val(pud));
>  }
> 
> +static inline bool has_svnapot(void)
> +{
> +       u64 _val;
> +
> +       ALT_SVNAPOT(_val);
> +       return _val;
> +}
> +
> +#ifdef CONFIG_SVNAPOT
> +
> +static inline unsigned long pte_napot(pte_t pte)
> +{
> +       return pte_val(pte) & _PAGE_NAPOT;
> +}
> +
> +static inline pte_t pte_mknapot(pte_t pte, unsigned int order)
> +{
> +       unsigned long napot_bits = (1UL << (order - 1)) << _PAGE_PFN_SHIFT;

I would probably prefer to write this as BIT(order - 1) << _PAGE_PFN_SHIFT

> +       unsigned long lower_prot =
> +               pte_val(pte) & ((1UL << _PAGE_PFN_SHIFT) - 1UL);

pte_val(pte) & GENMASK(_PAGE_PFN_SHIFT,0)?

> +       unsigned long upper_prot = (pte_val(pte) >> _PAGE_PFN_SHIFT)
> +                                  << _PAGE_PFN_SHIFT;

Why are you shifting this down & then back up again? Could you not just
reuse the negated mask here so the zeroing looks more intentional?

> +
> +       return __pte(upper_prot | napot_bits | lower_prot | _PAGE_NAPOT);
> +}
> +#endif /* CONFIG_SVNAPOT */
> +
>  /* Yields the page frame number (PFN) of a page table entry */
>  static inline unsigned long pte_pfn(pte_t pte)
>  {
> -       return __page_val_to_pfn(pte_val(pte));
> +       unsigned long _val  = pte_val(pte);
> +
> +       ALT_SVNAPOT_PTE_PFN(_val, _PAGE_NAPOT_SHIFT,
> +                           _PAGE_PFN_MASK, _PAGE_PFN_SHIFT);
> +       return _val;
>  }
> 
>  #define pte_page(x)     pfn_to_page(pte_pfn(x))
> diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> index 0be8a2403212..d2a61122c595 100644
> --- a/arch/riscv/kernel/cpu.c
> +++ b/arch/riscv/kernel/cpu.c
> @@ -96,6 +96,7 @@ static struct riscv_isa_ext_data isa_ext_arr[] = {
>         __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
>         __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
>         __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
> +       __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
>         __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
>  };
> 
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 553d755483ed..8cf52f0c5f1a 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -204,6 +204,7 @@ void __init riscv_fill_hwcap(void)
>                                 SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
>                                 SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
>                                 SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
> +                               SET_ISA_EXT_MAP("svnapot", RISCV_ISA_EXT_SVNAPOT);
>                         }
>  #undef SET_ISA_EXT_MAP
>                 }
> @@ -284,6 +285,20 @@ static bool __init_or_module cpufeature_probe_zicbom(unsigned int stage)
>         return false;
>  }
> 
> +static bool __init_or_module cpufeature_probe_svnapot(unsigned int stage)
> +{
> +#ifdef CONFIG_SVNAPOT
> +       switch (stage) {
> +       case RISCV_ALTERNATIVES_EARLY_BOOT:
> +               return false;
> +       default:
> +               return riscv_isa_extension_available(NULL, SVNAPOT);
> +       }
> +#endif
> +
> +       return false;
> +}
> +
>  /*
>   * Probe presence of individual extensions.
>   *
> @@ -301,6 +316,9 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage)
>         if (cpufeature_probe_zicbom(stage))
>                 cpu_req_feature |= (1U << CPUFEATURE_ZICBOM);

I wonder why BIT wasn't used here either?

> 
> +       if (cpufeature_probe_svnapot(stage))
> +               cpu_req_feature |= (1U << CPUFEATURE_SVNAPOT);

Could use it here too..

Using BIT() here looks like a no-brainer, have I missed something?
Thanks,
Conor.

> +
>         return cpu_req_feature;
>  }
> 
> --
> 2.35.1
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4 1/4] mm: modify pte format for Svnapot
  2022-08-22 15:34 ` [PATCH v4 1/4] mm: modify pte format for Svnapot panqinglin2020
  2022-08-22 20:45   ` Conor.Dooley
@ 2022-08-22 20:56   ` Conor.Dooley
  2022-08-24 17:37   ` Heiko Stübner
  2 siblings, 0 replies; 13+ messages in thread
From: Conor.Dooley @ 2022-08-22 20:56 UTC (permalink / raw)
  To: panqinglin2020, palmer, linux-riscv; +Cc: jeff, xuyinan

On 22/08/2022 16:34, panqinglin2020@iscas.ac.cn wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> 
> This commit adds two erratas to enable/disable svnapot support, patches code
> dynamicly when "svnapot" is in the "riscv,isa" field of fdt and SVNAPOT
> compile option is set. It will influence the behavior of has_svnapot
> function and pte_pfn function. All code dependent on svnapot should make
> sure that has_svnapot return true firstly.
> 
> Also, this commit modifies PTE definition for Svnapot, and creates some
> functions in pgtable.h to mark a PTE as napot and check if it is a Svnapot
> PTE. Until now, only 64KB napot size is supported in draft spec, so some
> macros has only 64KB version.
> 
> Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index ed66c31e4655..c43708ae7f38 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -432,6 +432,13 @@ config FPU
> 
>           If you don't know what to do here, say Y.
> 
> +config SVNAPOT

One more, CONFIG_RISCV_ISA_SVNAPOT to match the others?

> +       bool "Svnapot support"
> +       default n
> +       help
> +         Select if your CPU supports Svnapot and you want to enable it when
> +         kernel is booting.
> +
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4 2/4] mm: support Svnapot in physical page linear-mapping
  2022-08-22 15:34 ` [PATCH v4 2/4] mm: support Svnapot in physical page linear-mapping panqinglin2020
@ 2022-08-22 21:03   ` Conor.Dooley
  0 siblings, 0 replies; 13+ messages in thread
From: Conor.Dooley @ 2022-08-22 21:03 UTC (permalink / raw)
  To: panqinglin2020, palmer, linux-riscv; +Cc: jeff, xuyinan

Hey, 
Couple questions about some things that caught my eye.
Mostly beyond my pay grade here though...

On 22/08/2022 16:34, panqinglin2020@iscas.ac.cn wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> 
> Svnapot is powerful when a physical region is going to mapped to a
> virtual region. Kernel will do like this when mapping all allocable
> physical pages to kernel vm space. This commit modifies the
> create_pte_mapping function used in linear-mapping procedure, so the
> kernel can be able to use Svnapot when both address and length of
> physical region are 64KB align. Code here will be executed only when
> other size huge page is not suitable, so it can be an addition of
> PMD_SIZE and PUD_SIZE mapping.
> 
> This commit also modifies the best_map_size function to give map_size
> many times instead of only once, so a memory region can be mapped by
> both PMD_SIZE and 64KB napot size.
> 
> It is tested by setting qemu's memory to a 262272k region, and the
> kernel can boot successfully.
> 
> Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> 
> diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
> index cedcf8ea3c76..395fdc922e9e 100644
> --- a/arch/riscv/include/asm/mmu.h
> +++ b/arch/riscv/include/asm/mmu.h
> @@ -25,6 +25,7 @@ typedef struct {
> 
>  void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa,
>                                phys_addr_t sz, pgprot_t prot);
> +void __init create_linear_mapping(void);
>  #endif /* __ASSEMBLY__ */
> 
>  #endif /* _ASM_RISCV_MMU_H */
> diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
> index 95ef6e2bf45c..37e6f7044ef1 100644
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -292,13 +292,16 @@ void __init setup_arch(char **cmdline_p)
>         kasan_init();
>  #endif
> 
> -#ifdef CONFIG_SMP
> -       setup_smp();
> -#endif
> -
>         riscv_fill_hwcap();
>         riscv_init_cbom_blocksize();
>         apply_boot_alternatives();
> +
> +       if (has_svnapot())
> +               create_linear_mapping();

Does this now get called twice if has_svnapot()?
Once here and once in paging_init().

> +
> +#ifdef CONFIG_SMP
> +       setup_smp();
> +#endif

Are there any side effects to moving this down?

>  }
> 
>  static int __init topology_init(void)
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index b56a0a75533f..f54c3991f9ab 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -373,9 +373,21 @@ static void __init create_pte_mapping(pte_t *ptep,
>                                       phys_addr_t sz, pgprot_t prot)
>  {
>         uintptr_t pte_idx = pte_index(va);
> +#ifdef CONFIG_SVNAPOT
> +       pte_t pte;
> +
> +       if (has_svnapot() && sz == NAPOT_CONT64KB_SIZE) {
> +               do {
> +                       pte = pfn_pte(PFN_DOWN(pa), prot);
> +                       ptep[pte_idx] = pte_mknapot(pte, NAPOT_CONT64KB_ORDER);
> +                       pte_idx++;
> +                       sz -= PAGE_SIZE;
> +               } while (sz > 0);
> +               return;
> +       }
> +#endif
> 
>         BUG_ON(sz != PAGE_SIZE);
> -
>         if (pte_none(ptep[pte_idx]))
>                 ptep[pte_idx] = pfn_pte(PFN_DOWN(pa), prot);
>  }
> @@ -673,10 +685,18 @@ void __init create_pgd_mapping(pgd_t *pgdp,
>  static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
>  {
>         /* Upgrade to PMD_SIZE mappings whenever possible */
> -       if ((base & (PMD_SIZE - 1)) || (size & (PMD_SIZE - 1)))
> +       base &= PMD_SIZE - 1;
> +       if (!base && size >= PMD_SIZE)
> +               return PMD_SIZE;
> +
> +       if (!has_svnapot())
>                 return PAGE_SIZE;
> 
> -       return PMD_SIZE;
> +       base &= NAPOT_CONT64KB_SIZE - 1;
> +       if (!base && size >= NAPOT_CONT64KB_SIZE)
> +               return NAPOT_CONT64KB_SIZE;
> +
> +       return PAGE_SIZE;
>  }
> 
>  #ifdef CONFIG_XIP_KERNEL
> @@ -1090,18 +1110,12 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
>         pt_ops_set_fixmap();
>  }
> 
> -static void __init setup_vm_final(void)
> +void __init create_linear_mapping(void)
>  {
>         uintptr_t va, map_size;
>         phys_addr_t pa, start, end;
>         u64 i;
> 
> -       /* Setup swapper PGD for fixmap */
> -       create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
> -                          __pa_symbol(fixmap_pgd_next),
> -                          PGDIR_SIZE, PAGE_TABLE);
> -
> -       /* Map all memory banks in the linear mapping */
>         for_each_mem_range(i, &start, &end) {
>                 if (start >= end)
>                         break;
> @@ -1111,14 +1125,25 @@ static void __init setup_vm_final(void)
>                 if (end >= __pa(PAGE_OFFSET) + memory_limit)
>                         end = __pa(PAGE_OFFSET) + memory_limit;
> 
> -               map_size = best_map_size(start, end - start);
>                 for (pa = start; pa < end; pa += map_size) {
>                         va = (uintptr_t)__va(pa);
> +                       map_size = best_map_size(pa, end - pa);
> 
>                         create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
>                                            pgprot_from_va(va));
>                 }
>         }
> +}
> +
> +static void __init setup_vm_final(void)
> +{
> +       /* Setup swapper PGD for fixmap */
> +       create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
> +                          __pa_symbol(fixmap_pgd_next),
> +                          PGDIR_SIZE, PAGE_TABLE);
> +
> +       /* Map all memory banks in the linear mapping */
> +       create_linear_mapping();
> 
>         /* Map the kernel */
>         if (IS_ENABLED(CONFIG_64BIT))
> --
> 2.35.1
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4 3/4] mm: support Svnapot in hugetlb page
  2022-08-22 15:34 ` [PATCH v4 3/4] mm: support Svnapot in hugetlb page panqinglin2020
@ 2022-08-22 21:08   ` Conor.Dooley
  0 siblings, 0 replies; 13+ messages in thread
From: Conor.Dooley @ 2022-08-22 21:08 UTC (permalink / raw)
  To: panqinglin2020, palmer, linux-riscv; +Cc: jeff, xuyinan

Hey,
On 22/08/2022 16:34, panqinglin2020@iscas.ac.cn wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> 
> Svnapot can be used to support 64KB hugetlb page, so it can become a new
> option when using hugetlbfs. This commit adds a basic implementation of
> hugetlb page, and support 64KB as a size in it by using Svnapot.
> 
> For test, boot kernel with command line contains "default_hugepagesz=64K
> hugepagesz=64K hugepages=20" and run a simple test like this:
> 
> int main() {
>         void *addr;
>         addr = mmap(NULL, 64 * 1024, PROT_WRITE | PROT_READ,
>                         MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_64KB, -1, 0);
>         printf("back from mmap \n");
>         long *ptr = (long *)addr;
>         unsigned int i = 0;
>         for(; i < 8 * 1024;i += 512) {
>                 printf("%lp \n", ptr);
>                 *ptr = 0xdeafabcd12345678;
>                 ptr += 512;
>         }
>         ptr = (long *)addr;
>         i = 0;
>         for(; i < 8 * 1024;i += 512) {
>                 if (*ptr != 0xdeafabcd12345678) {
>                         printf("failed! 0x%lx \n", *ptr);
>                         break;
>                 }
>                 ptr += 512;
>         }
>         if(i == 8 * 1024)
>                 printf("simple test passed!\n");
> }
> 
> And it should be passed.

Actually built tested this version which I didnt get a 
chance to earlier. Got a couple new warnings:

/stuff/linux/arch/riscv/mm/hugetlbpage.c:58:5: warning: no previous prototype for function 'napot_pte_num' [-Wmissing-prototypes]
int napot_pte_num(pte_t pte)
    ^
/stuff/linux/arch/riscv/mm/hugetlbpage.c:58:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
int napot_pte_num(pte_t pte)

> 
> Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index c43708ae7f38..9aaec147a860 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -43,7 +43,7 @@ config RISCV
>         select ARCH_USE_QUEUED_RWLOCKS
>         select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
>         select ARCH_WANT_FRAME_POINTERS
> -       select ARCH_WANT_GENERAL_HUGETLB
> +       select ARCH_WANT_GENERAL_HUGETLB if !SVNAPOT
>         select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
>         select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
>         select BUILDTIME_TABLE_SORT if MMU
> diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h
> index a5c2ca1d1cd8..d315625542c8 100644
> --- a/arch/riscv/include/asm/hugetlb.h
> +++ b/arch/riscv/include/asm/hugetlb.h
> @@ -2,7 +2,35 @@
>  #ifndef _ASM_RISCV_HUGETLB_H
>  #define _ASM_RISCV_HUGETLB_H
> 
> -#include <asm-generic/hugetlb.h>
>  #include <asm/page.h>
> 
> +#ifdef CONFIG_SVNAPOT
> +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags);
> +#define arch_make_huge_pte arch_make_huge_pte
> +#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
> +void set_huge_pte_at(struct mm_struct *mm,
> +                    unsigned long addr, pte_t *ptep, pte_t pte);
> +#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
> +pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
> +                             unsigned long addr, pte_t *ptep);
> +#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
> +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
> +                           unsigned long addr, pte_t *ptep);
> +#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
> +int huge_ptep_set_access_flags(struct vm_area_struct *vma,
> +                              unsigned long addr, pte_t *ptep,
> +                              pte_t pte, int dirty);
> +#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
> +void huge_ptep_set_wrprotect(struct mm_struct *mm,
> +                            unsigned long addr, pte_t *ptep);
> +#define __HAVE_ARCH_HUGE_PTE_CLEAR
> +void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
> +                   pte_t *ptep, unsigned long sz);
> +#define set_huge_swap_pte_at riscv_set_huge_swap_pte_at
> +void riscv_set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
> +                               pte_t *ptep, pte_t pte, unsigned long sz);
> +#endif /*CONFIG_SVNAPOT*/
> +
> +#include <asm-generic/hugetlb.h>
> +
>  #endif /* _ASM_RISCV_HUGETLB_H */
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index ac70b0fd9a9a..1ea06476902a 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -17,7 +17,7 @@
>  #define PAGE_MASK      (~(PAGE_SIZE - 1))
> 
>  #ifdef CONFIG_64BIT
> -#define HUGE_MAX_HSTATE                2
> +#define HUGE_MAX_HSTATE                3
>  #else
>  #define HUGE_MAX_HSTATE                1
>  #endif
> diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
> index 932dadfdca54..71417f228624 100644
> --- a/arch/riscv/mm/hugetlbpage.c
> +++ b/arch/riscv/mm/hugetlbpage.c
> @@ -2,6 +2,239 @@
>  #include <linux/hugetlb.h>
>  #include <linux/err.h>
> 
> +#ifdef CONFIG_SVNAPOT
> +pte_t *huge_pte_alloc(struct mm_struct *mm,
> +                     struct vm_area_struct *vma,
> +                     unsigned long addr,
> +                     unsigned long sz)
> +{
> +       pgd_t *pgdp = pgd_offset(mm, addr);
> +       p4d_t *p4dp = p4d_alloc(mm, pgdp, addr);
> +       pud_t *pudp = pud_alloc(mm, p4dp, addr);
> +       pmd_t *pmdp = pmd_alloc(mm, pudp, addr);
> +
> +       if (sz == NAPOT_CONT64KB_SIZE) {
> +               if (!pmdp)
> +                       return NULL;
> +               WARN_ON(addr & (sz - 1));
> +               return pte_alloc_map(mm, pmdp, addr);
> +       }
> +
> +       return NULL;
> +}
> +
> +pte_t *huge_pte_offset(struct mm_struct *mm,
> +                      unsigned long addr,
> +                      unsigned long sz)
> +{
> +       pgd_t *pgdp;
> +       p4d_t *p4dp;
> +       pud_t *pudp;
> +       pmd_t *pmdp;
> +       pte_t *ptep = NULL;
> +
> +       pgdp = pgd_offset(mm, addr);
> +       if (!pgd_present(READ_ONCE(*pgdp)))
> +               return NULL;
> +
> +       p4dp = p4d_offset(pgdp, addr);
> +       if (!p4d_present(READ_ONCE(*p4dp)))
> +               return NULL;
> +
> +       pudp = pud_offset(p4dp, addr);
> +       if (!pud_present(READ_ONCE(*pudp)))
> +               return NULL;
> +
> +       pmdp = pmd_offset(pudp, addr);
> +       if (!pmd_present(READ_ONCE(*pmdp)))
> +               return NULL;
> +
> +       if (sz == NAPOT_CONT64KB_SIZE)
> +               ptep = pte_offset_kernel(pmdp, (addr & ~NAPOT_CONT64KB_MASK));
> +
> +       return ptep;
> +}
> +
> +int napot_pte_num(pte_t pte)
> +{
> +       if (!(pte_val(pte) & NAPOT_64KB_MASK))
> +               return NAPOT_64KB_PTE_NUM;
> +
> +       pr_warn("%s: unrecognized napot pte size 0x%lx\n",
> +               __func__, pte_val(pte));
> +       return 1;
> +}
> +
> +static pte_t get_clear_flush(struct mm_struct *mm,
> +                            unsigned long addr,
> +                            pte_t *ptep,
> +                            unsigned long pte_num)
> +{
> +       pte_t orig_pte = huge_ptep_get(ptep);
> +       bool valid = pte_val(orig_pte);
> +       unsigned long i, saddr = addr;
> +
> +       for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) {
> +               pte_t pte = ptep_get_and_clear(mm, addr, ptep);
> +
> +               if (pte_dirty(pte))
> +                       orig_pte = pte_mkdirty(orig_pte);
> +
> +               if (pte_young(pte))
> +                       orig_pte = pte_mkyoung(orig_pte);
> +       }
> +
> +       if (valid) {
> +               struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
> +
> +               flush_tlb_range(&vma, saddr, addr);
> +       }
> +       return orig_pte;
> +}
> +
> +static void clear_flush(struct mm_struct *mm,
> +                       unsigned long addr,
> +                       pte_t *ptep,
> +                       unsigned long pte_num)
> +{
> +       struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
> +       unsigned long i, saddr = addr;
> +
> +       for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
> +               pte_clear(mm, addr, ptep);
> +
> +       flush_tlb_range(&vma, saddr, addr);
> +}
> +
> +pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
> +{
> +       if (shift == NAPOT_CONT64KB_SHIFT)
> +               entry = pte_mknapot(entry, NAPOT_CONT64KB_SHIFT - PAGE_SHIFT);
> +
> +       return entry;
> +}
> +
> +void set_huge_pte_at(struct mm_struct *mm,
> +                    unsigned long addr,
> +                    pte_t *ptep,
> +                    pte_t pte)
> +{
> +       int i;
> +       int pte_num;
> +
> +       if (!pte_napot(pte)) {
> +               set_pte_at(mm, addr, ptep, pte);
> +               return;
> +       }
> +
> +       pte_num = napot_pte_num(pte);
> +       for (i = 0; i < pte_num; i++, ptep++, addr += PAGE_SIZE)
> +               set_pte_at(mm, addr, ptep, pte);
> +}
> +
> +int huge_ptep_set_access_flags(struct vm_area_struct *vma,
> +                              unsigned long addr,
> +                              pte_t *ptep,
> +                              pte_t pte,
> +                              int dirty)
> +{
> +       pte_t orig_pte;
> +       int i;
> +       int pte_num;
> +
> +       if (!pte_napot(pte))
> +               return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
> +
> +       pte_num = napot_pte_num(pte);
> +       ptep = huge_pte_offset(vma->vm_mm, addr, NAPOT_CONT64KB_SIZE);
> +       orig_pte = huge_ptep_get(ptep);
> +
> +       if (pte_dirty(orig_pte))
> +               pte = pte_mkdirty(pte);
> +
> +       if (pte_young(orig_pte))
> +               pte = pte_mkyoung(pte);
> +
> +       for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
> +               ptep_set_access_flags(vma, addr, ptep, pte, dirty);
> +
> +       return true;
> +}
> +
> +pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
> +                             unsigned long addr,
> +                             pte_t *ptep)
> +{
> +       int pte_num;
> +       pte_t orig_pte = huge_ptep_get(ptep);
> +
> +       if (!pte_napot(orig_pte))
> +               return ptep_get_and_clear(mm, addr, ptep);
> +
> +       pte_num = napot_pte_num(orig_pte);
> +       return get_clear_flush(mm, addr, ptep, pte_num);
> +}
> +
> +void huge_ptep_set_wrprotect(struct mm_struct *mm,
> +                            unsigned long addr,
> +                            pte_t *ptep)
> +{
> +       int i;
> +       int pte_num;
> +       pte_t pte = READ_ONCE(*ptep);
> +
> +       if (!pte_napot(pte))
> +               return ptep_set_wrprotect(mm, addr, ptep);
> +
> +       pte_num = napot_pte_num(pte);
> +       ptep = huge_pte_offset(mm, addr, NAPOT_CONT64KB_SIZE);
> +
> +       for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
> +               ptep_set_wrprotect(mm, addr, ptep);
> +}
> +
> +pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
> +                           unsigned long addr,
> +                           pte_t *ptep)
> +{
> +       int pte_num;
> +       pte_t pte = READ_ONCE(*ptep);
> +
> +       if (!pte_napot(pte)) {
> +               ptep_clear_flush(vma, addr, ptep);
> +               return pte;
> +       }
> +
> +       pte_num = napot_pte_num(pte);
> +       clear_flush(vma->vm_mm, addr, ptep, pte_num);
> +
> +       return pte;
> +}
> +
> +void huge_pte_clear(struct mm_struct *mm,
> +                   unsigned long addr,
> +                   pte_t *ptep,
> +                   unsigned long sz)
> +{
> +       int i, pte_num;
> +
> +       pte_num = napot_pte_num(READ_ONCE(*ptep));
> +       for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
> +               pte_clear(mm, addr, ptep);
> +}
> +
> +void riscv_set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
> +                               pte_t *ptep, pte_t pte, unsigned long sz)
> +{
> +       int i, pte_num;
> +
> +       pte_num = napot_pte_num(READ_ONCE(*ptep));
> +
> +       for (i = 0; i < pte_num; i++, ptep++)
> +               set_pte(ptep, pte);
> +}
> +#endif /*CONFIG_SVNAPOT*/
> +
>  int pud_huge(pud_t pud)
>  {
>         return pud_leaf(pud);
> @@ -18,17 +251,26 @@ bool __init arch_hugetlb_valid_size(unsigned long size)
>                 return true;
>         else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE)
>                 return true;
> +#ifdef CONFIG_SVNAPOT
> +       else if (has_svnapot() && size == NAPOT_CONT64KB_SIZE)
> +               return true;
> +#endif /*CONFIG_SVNAPOT*/
>         else
>                 return false;
>  }
> 
> -#ifdef CONFIG_CONTIG_ALLOC
> -static __init int gigantic_pages_init(void)
> +static __init int hugetlbpage_init(void)
>  {
> +#ifdef CONFIG_CONTIG_ALLOC
>         /* With CONTIG_ALLOC, we can allocate gigantic pages at runtime */
>         if (IS_ENABLED(CONFIG_64BIT))
>                 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
> +#endif /*CONFIG_CONTIG_ALLOC*/
> +       hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
> +#ifdef CONFIG_SVNAPOT
> +       if (has_svnapot())
> +               hugetlb_add_hstate(NAPOT_CONT64KB_SHIFT - PAGE_SHIFT);
> +#endif /*CONFIG_SVNAPOT*/
>         return 0;
>  }
> -arch_initcall(gigantic_pages_init);
> -#endif
> +arch_initcall(hugetlbpage_init);
> --
> 2.35.1
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4 4/4] mm: support Svnapot in huge vmap
  2022-08-22 15:34 ` [PATCH v4 4/4] mm: support Svnapot in huge vmap panqinglin2020
@ 2022-08-22 21:13   ` Conor.Dooley
  0 siblings, 0 replies; 13+ messages in thread
From: Conor.Dooley @ 2022-08-22 21:13 UTC (permalink / raw)
  To: panqinglin2020, palmer, linux-riscv; +Cc: jeff, xuyinan

On 22/08/2022 16:34, panqinglin2020@iscas.ac.cn wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> 
> The HAVE_ARCH_HUGE_VMAP option can be used to help implement arch
> special huge vmap size. This commit selects this option by default and
> re-writes the arch_vmap_pte_range_map_size for Svnapot 64KB size.
> 
> It can be tested when booting kernel in qemu with pci device, which
> will make the kernel to call pci driver using ioremap, and the
> re-written function will be called.
> 
> Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 9aaec147a860..a420325a24ac 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -70,6 +70,7 @@ config RISCV
>         select GENERIC_TIME_VSYSCALL if MMU && 64BIT
>         select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
>         select HAVE_ARCH_AUDITSYSCALL
> +       select HAVE_ARCH_HUGE_VMAP
>         select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
>         select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL
>         select HAVE_ARCH_KASAN if MMU && 64BIT
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index 37547dd04010..6d5caa1a6bd9 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -750,6 +750,43 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
>  }
>  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
> 
> +static inline int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
> +{
> +       return 0;
> +}
> +
> +static inline int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
> +{
> +       return 0;
> +}
> +
> +static inline void p4d_clear_huge(p4d_t *p4d) { }
> +
> +static inline int pud_clear_huge(pud_t *pud)
> +{
> +       return 0;
> +}
> +
> +static inline int pmd_clear_huge(pmd_t *pmd)
> +{
> +       return 0;
> +}
> +
> +static inline int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
> +{
> +       return 0;
> +}
> +
> +static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr)
> +{
> +       return 0;
> +}
> +
> +static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
> +{
> +       return 0;
> +}
> +
>  /*
>   * Encode and decode a swap entry
>   *
> diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h
> index ff9abc00d139..d92880fbfcde 100644
> --- a/arch/riscv/include/asm/vmalloc.h
> +++ b/arch/riscv/include/asm/vmalloc.h
> @@ -1,4 +1,26 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
>  #ifndef _ASM_RISCV_VMALLOC_H
>  #define _ASM_RISCV_VMALLOC_H
> 
> +#include <linux/pgtable.h>
> +
> +#ifdef CONFIG_SVNAPOT
> +#define arch_vmap_pte_range_map_size vmap_pte_range_map_size
> +static inline unsigned long vmap_pte_range_map_size(unsigned long addr,
> +                                                   unsigned long end,
> +                                                   u64 pfn,
> +                                                   unsigned int max_page_shift)

How about:

static inline unsigned long
vmap_pte_range_map_size(unsigned long addr, unsigned long end, u64 pfn,
			unsigned int max_page_shift)
{
?
> +{
> +       bool is_napot_addr = !(addr & NAPOT_CONT64KB_MASK);
> +       bool pfn_align_napot = !(pfn & (NAPOT_64KB_PTE_NUM - 1UL));
> +       bool space_enough = ((end - addr) >= NAPOT_CONT64KB_SIZE);
> +

tbh I would rather see this early return for each of these failed
tests rather than lump them all into the below. Prob can do the same
for each of them.. Personally I find that more readable than what you
have here. /shrug

Thanks,
Conor.

> +       if (has_svnapot() && is_napot_addr && pfn_align_napot &&
> +           space_enough && max_page_shift >= NAPOT_CONT64KB_SHIFT)
> +               return NAPOT_CONT64KB_SIZE;

> +
> +       return PAGE_SIZE;
> +}
> +#endif /*CONFIG_SVNAPOT*/
> +
>  #endif /* _ASM_RISCV_VMALLOC_H */
> --
> 2.35.1
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4 0/4] riscv, mm: detect svnapot cpu support at runtime
  2022-08-22 15:34 [PATCH v4 0/4] riscv, mm: detect svnapot cpu support at runtime panqinglin2020
                   ` (3 preceding siblings ...)
  2022-08-22 15:34 ` [PATCH v4 4/4] mm: support Svnapot in huge vmap panqinglin2020
@ 2022-08-22 21:22 ` Conor.Dooley
  2022-08-23  3:07   ` Qinglin Pan
  4 siblings, 1 reply; 13+ messages in thread
From: Conor.Dooley @ 2022-08-22 21:22 UTC (permalink / raw)
  To: panqinglin2020, palmer, linux-riscv; +Cc: jeff, xuyinan

On 22/08/2022 16:34, panqinglin2020@iscas.ac.cn wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> From: Qinglin Pan <panqinglin2020@iscas.ac.cn>

Hey Qingling,
Apologies for yet another mail - but I built allmodconfig
with CONFIG_SVNAPOT=y and got a build error:

  CC      arch/riscv/kernel/asm-offsets.s
In file included from /stuff/linux/arch/riscv/include/asm/tlbflush.h:12,
                 from /stuff/linux/arch/riscv/include/asm/pgtable.h:108,
                 from /stuff/linux/include/linux/pgtable.h:6,
                 from /stuff/linux/include/linux/mm.h:29,
                 from /stuff/linux/arch/riscv/kernel/asm-offsets.c:10:
/stuff/linux/arch/riscv/include/asm/pgtable.h: In function 'pte_pfn':
/stuff/linux/arch/riscv/include/asm/pgtable.h:299:35: error: '_PAGE_NAPOT_SHIFT' undeclared (first use in this function)
  299 |         ALT_SVNAPOT_PTE_PFN(_val, _PAGE_NAPOT_SHIFT,
      |                                   ^~~~~~~~~~~~~~~~~
/stuff/linux/arch/riscv/include/asm/errata_list.h:165:23: note: in definition of macro 'ALT_SVNAPOT_PTE_PFN'
  165 |                   "i"(_napot_shift))
      |                       ^~~~~~~~~~~~
/stuff/linux/arch/riscv/include/asm/pgtable.h:299:35: note: each undeclared identifier is reported only once for each function it appears in
  299 |         ALT_SVNAPOT_PTE_PFN(_val, _PAGE_NAPOT_SHIFT,
      |                                   ^~~~~~~~~~~~~~~~~
/stuff/linux/arch/riscv/include/asm/errata_list.h:165:23: note: in definition of macro 'ALT_SVNAPOT_PTE_PFN'
  165 |                   "i"(_napot_shift))
      |                       ^~~~~~~~~~~~
In file included from /stuff/linux/include/linux/mm.h:29,
                 from /stuff/linux/arch/riscv/kernel/asm-offsets.c:10:
/stuff/linux/include/linux/pgtable.h: At top level:
/stuff/linux/include/linux/pgtable.h:1457:20: error: redefinition of 'p4d_clear_huge'
 1457 | static inline void p4d_clear_huge(p4d_t *p4d) { }
      |                    ^~~~~~~~~~~~~~
In file included from /stuff/linux/include/linux/pgtable.h:6,
                 from /stuff/linux/include/linux/mm.h:29,
                 from /stuff/linux/arch/riscv/kernel/asm-offsets.c:10:
/stuff/linux/arch/riscv/include/asm/pgtable.h:763:20: note: previous definition of 'p4d_clear_huge' with type 'void(p4d_t *)'
  763 | static inline void p4d_clear_huge(p4d_t *p4d) { }
      |                    ^~~~~~~~~~~~~~
make[3]: *** [/stuff/linux/scripts/Makefile.build:117: arch/riscv/kernel/asm-offsets.s] Error 1
make[2]: *** [/stuff/linux/Makefile:1205: prepare0] Error 2
make[2]: Leaving directory '/stuff/brsdk/work/linux'
make[1]: *** [Makefile:222: __sub-make] Error 2
make[1]: Leaving directory '/stuff/linux'
make: *** [Makefile:179: allmodconfig] Error 2

> 
> Svnapot is a RISC-V extension for marking contiguous 4K pages as a non-4K
> page. This patch set is for using Svnapot in Linux Kernel's boot process
> and hugetlb fs.
> 
> This patchset adds a Kconfig item for using Svnapot in
> "Platform type"->"Svnapot support". Its default value is off, and people can set
> it on if they allow kernel to detect Svnapot hardware support and leverage it.
> 
> Tested on:
>   - qemu rv64 with "Svnapot support" off and svnapot=true.
>   - qemu rv64 with "Svnapot support" on and svnapot=true.
>   - qemu rv64 with "Svnapot support" off and svnapot=false.
>   - qemu rv64 with "Svnapot support" on and svnapot=false.
> 
> Changes in v2:
>   - detect Svnapot hardware support at boot time.
> Changes in v3:
>   - do linear mapping again if has_svnapot
> Changes in v4:
>   - fix some errors/warns reported by checkpatch.pl, thanks @Conor
> 
> Qinglin Pan (4):
>   mm: modify pte format for Svnapot
>   mm: support Svnapot in physical page linear-mapping
>   mm: support Svnapot in hugetlb page
>   mm: support Svnapot in huge vmap
> 
>  arch/riscv/Kconfig                   |  10 +-
>  arch/riscv/include/asm/errata_list.h |  24 ++-
>  arch/riscv/include/asm/hugetlb.h     |  30 +++-
>  arch/riscv/include/asm/hwcap.h       |   1 +
>  arch/riscv/include/asm/mmu.h         |   1 +
>  arch/riscv/include/asm/page.h        |   2 +-
>  arch/riscv/include/asm/pgtable-64.h  |  14 ++
>  arch/riscv/include/asm/pgtable.h     |  70 +++++++-
>  arch/riscv/include/asm/vmalloc.h     |  22 +++
>  arch/riscv/kernel/cpu.c              |   1 +
>  arch/riscv/kernel/cpufeature.c       |  18 ++
>  arch/riscv/kernel/setup.c            |  11 +-
>  arch/riscv/mm/hugetlbpage.c          | 250 ++++++++++++++++++++++++++-
>  arch/riscv/mm/init.c                 |  47 +++--
>  14 files changed, 477 insertions(+), 24 deletions(-)
> 
> --
> 2.35.1
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv

_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: Re: [PATCH v4 0/4] riscv, mm: detect svnapot cpu support at runtime
  2022-08-22 21:22 ` [PATCH v4 0/4] riscv, mm: detect svnapot cpu support at runtime Conor.Dooley
@ 2022-08-23  3:07   ` Qinglin Pan
  0 siblings, 0 replies; 13+ messages in thread
From: Qinglin Pan @ 2022-08-23  3:07 UTC (permalink / raw)
  To: Conor.Dooley, linux-riscv; +Cc: jeff, xuyinan

Hey Conor,
I think something maybe wrong with my patchset, and I will fix them and 
all hints you have mentioned in other mails in the next version patchset.

Thank you so much for your review.

Yours,
Qinglin

On 8/23/22 5:22 AM, Conor.Dooley@microchip.com wrote:
> On 22/08/2022 16:34, panqinglin2020@iscas.ac.cn wrote:
>> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
>>
>> From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> 
> Hey Qingling,
> Apologies for yet another mail - but I built allmodconfig
> with CONFIG_SVNAPOT=y and got a build error:
> 
>    CC      arch/riscv/kernel/asm-offsets.s
> In file included from /stuff/linux/arch/riscv/include/asm/tlbflush.h:12,
>                   from /stuff/linux/arch/riscv/include/asm/pgtable.h:108,
>                   from /stuff/linux/include/linux/pgtable.h:6,
>                   from /stuff/linux/include/linux/mm.h:29,
>                   from /stuff/linux/arch/riscv/kernel/asm-offsets.c:10:
> /stuff/linux/arch/riscv/include/asm/pgtable.h: In function 'pte_pfn':
> /stuff/linux/arch/riscv/include/asm/pgtable.h:299:35: error: '_PAGE_NAPOT_SHIFT' undeclared (first use in this function)
>    299 |         ALT_SVNAPOT_PTE_PFN(_val, _PAGE_NAPOT_SHIFT,
>        |                                   ^~~~~~~~~~~~~~~~~
> /stuff/linux/arch/riscv/include/asm/errata_list.h:165:23: note: in definition of macro 'ALT_SVNAPOT_PTE_PFN'
>    165 |                   "i"(_napot_shift))
>        |                       ^~~~~~~~~~~~
> /stuff/linux/arch/riscv/include/asm/pgtable.h:299:35: note: each undeclared identifier is reported only once for each function it appears in
>    299 |         ALT_SVNAPOT_PTE_PFN(_val, _PAGE_NAPOT_SHIFT,
>        |                                   ^~~~~~~~~~~~~~~~~
> /stuff/linux/arch/riscv/include/asm/errata_list.h:165:23: note: in definition of macro 'ALT_SVNAPOT_PTE_PFN'
>    165 |                   "i"(_napot_shift))
>        |                       ^~~~~~~~~~~~
> In file included from /stuff/linux/include/linux/mm.h:29,
>                   from /stuff/linux/arch/riscv/kernel/asm-offsets.c:10:
> /stuff/linux/include/linux/pgtable.h: At top level:
> /stuff/linux/include/linux/pgtable.h:1457:20: error: redefinition of 'p4d_clear_huge'
>   1457 | static inline void p4d_clear_huge(p4d_t *p4d) { }
>        |                    ^~~~~~~~~~~~~~
> In file included from /stuff/linux/include/linux/pgtable.h:6,
>                   from /stuff/linux/include/linux/mm.h:29,
>                   from /stuff/linux/arch/riscv/kernel/asm-offsets.c:10:
> /stuff/linux/arch/riscv/include/asm/pgtable.h:763:20: note: previous definition of 'p4d_clear_huge' with type 'void(p4d_t *)'
>    763 | static inline void p4d_clear_huge(p4d_t *p4d) { }
>        |                    ^~~~~~~~~~~~~~
> make[3]: *** [/stuff/linux/scripts/Makefile.build:117: arch/riscv/kernel/asm-offsets.s] Error 1
> make[2]: *** [/stuff/linux/Makefile:1205: prepare0] Error 2
> make[2]: Leaving directory '/stuff/brsdk/work/linux'
> make[1]: *** [Makefile:222: __sub-make] Error 2
> make[1]: Leaving directory '/stuff/linux'
> make: *** [Makefile:179: allmodconfig] Error 2
> 
>>
>> Svnapot is a RISC-V extension for marking contiguous 4K pages as a non-4K
>> page. This patch set is for using Svnapot in Linux Kernel's boot process
>> and hugetlb fs.
>>
>> This patchset adds a Kconfig item for using Svnapot in
>> "Platform type"->"Svnapot support". Its default value is off, and people can set
>> it on if they allow kernel to detect Svnapot hardware support and leverage it.
>>
>> Tested on:
>>    - qemu rv64 with "Svnapot support" off and svnapot=true.
>>    - qemu rv64 with "Svnapot support" on and svnapot=true.
>>    - qemu rv64 with "Svnapot support" off and svnapot=false.
>>    - qemu rv64 with "Svnapot support" on and svnapot=false.
>>
>> Changes in v2:
>>    - detect Svnapot hardware support at boot time.
>> Changes in v3:
>>    - do linear mapping again if has_svnapot
>> Changes in v4:
>>    - fix some errors/warns reported by checkpatch.pl, thanks @Conor
>>
>> Qinglin Pan (4):
>>    mm: modify pte format for Svnapot
>>    mm: support Svnapot in physical page linear-mapping
>>    mm: support Svnapot in hugetlb page
>>    mm: support Svnapot in huge vmap
>>
>>   arch/riscv/Kconfig                   |  10 +-
>>   arch/riscv/include/asm/errata_list.h |  24 ++-
>>   arch/riscv/include/asm/hugetlb.h     |  30 +++-
>>   arch/riscv/include/asm/hwcap.h       |   1 +
>>   arch/riscv/include/asm/mmu.h         |   1 +
>>   arch/riscv/include/asm/page.h        |   2 +-
>>   arch/riscv/include/asm/pgtable-64.h  |  14 ++
>>   arch/riscv/include/asm/pgtable.h     |  70 +++++++-
>>   arch/riscv/include/asm/vmalloc.h     |  22 +++
>>   arch/riscv/kernel/cpu.c              |   1 +
>>   arch/riscv/kernel/cpufeature.c       |  18 ++
>>   arch/riscv/kernel/setup.c            |  11 +-
>>   arch/riscv/mm/hugetlbpage.c          | 250 ++++++++++++++++++++++++++-
>>   arch/riscv/mm/init.c                 |  47 +++--
>>   14 files changed, 477 insertions(+), 24 deletions(-)
>>
>> --
>> 2.35.1
>>
>>
>> _______________________________________________
>> linux-riscv mailing list
>> linux-riscv@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-riscv
> 


_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH v4 1/4] mm: modify pte format for Svnapot
  2022-08-22 15:34 ` [PATCH v4 1/4] mm: modify pte format for Svnapot panqinglin2020
  2022-08-22 20:45   ` Conor.Dooley
  2022-08-22 20:56   ` Conor.Dooley
@ 2022-08-24 17:37   ` Heiko Stübner
  2 siblings, 0 replies; 13+ messages in thread
From: Heiko Stübner @ 2022-08-24 17:37 UTC (permalink / raw)
  To: palmer, linux-riscv; +Cc: jeff, xuyinan, Qinglin Pan, panqinglin2020

Hi,

Am Montag, 22. August 2022, 17:34:10 CEST schrieb panqinglin2020@iscas.ac.cn:
> From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> 
> This commit adds two erratas to enable/disable svnapot support, patches code
> dynamicly when "svnapot" is in the "riscv,isa" field of fdt and SVNAPOT
> compile option is set. It will influence the behavior of has_svnapot
> function and pte_pfn function. All code dependent on svnapot should make
> sure that has_svnapot return true firstly.
> 
> Also, this commit modifies PTE definition for Svnapot, and creates some
> functions in pgtable.h to mark a PTE as napot and check if it is a Svnapot
> PTE. Until now, only 64KB napot size is supported in draft spec, so some
> macros has only 64KB version.
> 
> Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index ed66c31e4655..c43708ae7f38 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -432,6 +432,13 @@ config FPU
>  
>  	  If you don't know what to do here, say Y.
>  
> +config SVNAPOT

as Connor already wrote, please make this RISCV_ISA_SVNAPOT,
and maybe also move it upwards a bit so that we get some sorting going
for all the extensions :-) .


> +	bool "Svnapot support"
> +	default n
> +	help
> +	  Select if your CPU supports Svnapot and you want to enable it when
> +	  kernel is booting.


please make this a bit more verbose, something like:

  Add support to dynamically detect the presence of the SVNAPOT
  ISA-extension (Supervisor-mode: NAPOT Translation Contiguity)
  and enable its usage.

[plus add a paragraph explaining what SVNAPOT helps with]


>  endmenu # "Platform type"
>  
>  menu "Kernel features"
> diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> index 19a771085781..f22723174cd9 100644
> --- a/arch/riscv/include/asm/errata_list.h
> +++ b/arch/riscv/include/asm/errata_list.h
> @@ -22,7 +22,8 @@
>  
>  #define	CPUFEATURE_SVPBMT 0
>  #define	CPUFEATURE_ZICBOM 1
> -#define	CPUFEATURE_NUMBER 2
> +#define	CPUFEATURE_SVNAPOT 2
> +#define	CPUFEATURE_NUMBER 3
>  
>  #ifdef __ASSEMBLY__
>  
> @@ -142,6 +143,27 @@ asm volatile(ALTERNATIVE_2(						\
>  	    "r"((unsigned long)(_start) + (_size))			\
>  	: "a0")
>  
> +#define ALT_SVNAPOT(_val)						\
> +asm(ALTERNATIVE("li %0, 0", "li %0, 1", 0,				\
> +		CPUFEATURE_SVNAPOT, CONFIG_SVNAPOT)			\
> +		: "=r"(_val) :)
> +
> +#define ALT_SVNAPOT_PTE_PFN(_val, _napot_shift, _pfn_mask, _pfn_shift)	\
> +asm(ALTERNATIVE("and %0, %1, %2\n\t"					\
> +		"srli %0, %0, %3\n\t"					\
> +		"nop\n\tnop\n\tnop",					\

using the new-ish __nops macro might make this a tad nicer to read, see
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/riscv/include/asm/errata_list.h#n122

> +		"srli t3, %1, %4\n\t"					\
> +		"and %0, %1, %2\n\t"					\
> +		"srli %0, %0, %3\n\t"					\
> +		"sub  t4, %0, t3\n\t"					\
> +		"and  %0, %0, t4",					\
> +		0, CPUFEATURE_SVNAPOT, CONFIG_SVNAPOT)			\
> +		: "+r"(_val)						\
> +		: "r"(_val),						\
> +		  "r"(_pfn_mask),					\
> +		  "i"(_pfn_shift),					\
> +		  "i"(_napot_shift))
> +
>  #endif /* __ASSEMBLY__ */
>  
>  #endif
> diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
> index 6f59ec64175e..e4c7ce5a7e1a 100644
> --- a/arch/riscv/include/asm/hwcap.h
> +++ b/arch/riscv/include/asm/hwcap.h
> @@ -58,6 +58,7 @@ enum riscv_isa_ext_id {
>  	RISCV_ISA_EXT_ZICBOM,
>  	RISCV_ISA_EXT_ZIHINTPAUSE,
>  	RISCV_ISA_EXT_SSTC,
> +	RISCV_ISA_EXT_SVNAPOT,

that list is only a kernel-internal list, so we can probably keep some
sorting for extensions.


Heiko



_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2022-08-24 17:38 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-22 15:34 [PATCH v4 0/4] riscv, mm: detect svnapot cpu support at runtime panqinglin2020
2022-08-22 15:34 ` [PATCH v4 1/4] mm: modify pte format for Svnapot panqinglin2020
2022-08-22 20:45   ` Conor.Dooley
2022-08-22 20:56   ` Conor.Dooley
2022-08-24 17:37   ` Heiko Stübner
2022-08-22 15:34 ` [PATCH v4 2/4] mm: support Svnapot in physical page linear-mapping panqinglin2020
2022-08-22 21:03   ` Conor.Dooley
2022-08-22 15:34 ` [PATCH v4 3/4] mm: support Svnapot in hugetlb page panqinglin2020
2022-08-22 21:08   ` Conor.Dooley
2022-08-22 15:34 ` [PATCH v4 4/4] mm: support Svnapot in huge vmap panqinglin2020
2022-08-22 21:13   ` Conor.Dooley
2022-08-22 21:22 ` [PATCH v4 0/4] riscv, mm: detect svnapot cpu support at runtime Conor.Dooley
2022-08-23  3:07   ` Qinglin Pan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.