linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/5] mm: allow arch refinement/skip for vmap alloc
  2024-04-15 20:16 [PATCH 0/5] mm: code and data partitioning improvements Maxwell Bland
@ 2024-04-02 20:15 ` Maxwell Bland
  2024-04-16 19:18   ` [PATCH 1/5 RESEND] " Maxwell Bland
  2024-04-18  8:55   ` [PATCH 1/5] " Uladzislau Rezki
  2024-04-03 21:08 ` [PATCH 2/5] arm64: mm: code and data partitioning for aslr Maxwell Bland
                   ` (4 subsequent siblings)
  5 siblings, 2 replies; 18+ messages in thread
From: Maxwell Bland @ 2024-04-02 20:15 UTC (permalink / raw)
  To: linux-mm
  Cc: Maxwell Bland, linux-kernel, Andrew Morton, Uladzislau Rezki,
	Christoph Hellwig, Lorenzo Stoakes

Makes red black tree allocation more flexible on a per-architecture
basis by introducing an optional hooks to refine the red-black tree
structuring and exposing vmalloc functions for clipping vmap areas,
finding vmap areas, and inserting vmap areas.

With this patch, the red-black vmap tree can be refined to account for
architecture-specific memory management operations, most notably address
space layout randomization, as these features conflict with generic
management of a single vmalloc_start to vmalloc_end range as given by
mm/vmalloc.c.

For example, x86 is forced to restrict aslr to 1024 possible locations,
which is a very, very small number, and arm64 breaks standard code/data
partitioning altogether, which prevents the enforcement of performant
immmutability on kernel page tables.

Signed-off-by: Maxwell Bland <mbland@motorola.com>
---
 include/linux/vmalloc.h | 24 ++++++++++++++++++++++++
 mm/vmalloc.c            | 16 ++++++++++------
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 98ea90e90439..3c5ce7ee0bea 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -12,6 +12,7 @@
 
 #include <asm/vmalloc.h>
 
+struct kmem_cache;
 struct vm_area_struct;		/* vma defining user mapping in mm_types.h */
 struct notifier_block;		/* in notifier.h */
 struct iov_iter;		/* in uio.h */
@@ -125,6 +126,21 @@ static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
 }
 #endif
 
+#ifndef arch_skip_va
+static inline bool arch_skip_va(struct vmap_area *va, unsigned long vstart)
+{
+	return false;
+}
+#endif
+
+#ifndef arch_refine_vmap_space
+static inline void arch_refine_vmap_space(struct rb_root *root,
+					  struct list_head *head,
+					  struct kmem_cache *cachep)
+{
+}
+#endif
+
 /*
  *	Highlevel APIs for driver use
  */
@@ -214,6 +230,14 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
 void free_vm_area(struct vm_struct *area);
 extern struct vm_struct *remove_vm_area(const void *addr);
 extern struct vm_struct *find_vm_area(const void *addr);
+extern void insert_vmap_area_augment(struct vmap_area *va, struct rb_node *from,
+				     struct rb_root *root,
+				     struct list_head *head);
+extern int va_clip(struct rb_root *root, struct list_head *head,
+		   struct vmap_area *va, unsigned long nva_start_addr,
+		   unsigned long size);
+extern struct vmap_area *__find_vmap_area(unsigned long addr,
+					  struct rb_root *root);
 struct vmap_area *find_vmap_area(unsigned long addr);
 
 static inline bool is_vm_area_hugepages(const void *addr)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 68fa001648cc..de4577a3708e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -989,7 +989,7 @@ unsigned long vmalloc_nr_pages(void)
 	return atomic_long_read(&nr_vmalloc_pages);
 }
 
-static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
+struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
 {
 	struct rb_node *n = root->rb_node;
 
@@ -1322,7 +1322,7 @@ insert_vmap_area(struct vmap_area *va,
 		link_va(va, root, parent, link, head);
 }
 
-static void
+void
 insert_vmap_area_augment(struct vmap_area *va,
 	struct rb_node *from, struct rb_root *root,
 	struct list_head *head)
@@ -1501,7 +1501,7 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size,
 				vstart < va->va_start) {
 			node = node->rb_left;
 		} else {
-			if (is_within_this_va(va, size, align, vstart))
+			if (!arch_skip_va(va, vstart) && is_within_this_va(va, size, align, vstart))
 				return va;
 
 			/*
@@ -1522,7 +1522,8 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size,
 			 */
 			while ((node = rb_parent(node))) {
 				va = rb_entry(node, struct vmap_area, rb_node);
-				if (is_within_this_va(va, size, align, vstart))
+				if (!arch_skip_va(va, vstart) &&
+				    is_within_this_va(va, size, align, vstart))
 					return va;
 
 				if (get_subtree_max_size(node->rb_right) >= length &&
@@ -1554,7 +1555,7 @@ find_vmap_lowest_linear_match(struct list_head *head, unsigned long size,
 	struct vmap_area *va;
 
 	list_for_each_entry(va, head, list) {
-		if (!is_within_this_va(va, size, align, vstart))
+		if (arch_skip_va(va, vstart) || !is_within_this_va(va, size, align, vstart))
 			continue;
 
 		return va;
@@ -1617,7 +1618,7 @@ classify_va_fit_type(struct vmap_area *va,
 	return type;
 }
 
-static __always_inline int
+__always_inline int
 va_clip(struct rb_root *root, struct list_head *head,
 		struct vmap_area *va, unsigned long nva_start_addr,
 		unsigned long size)
@@ -5129,4 +5130,7 @@ void __init vmalloc_init(void)
 	vmap_node_shrinker->count_objects = vmap_node_shrink_count;
 	vmap_node_shrinker->scan_objects = vmap_node_shrink_scan;
 	shrinker_register(vmap_node_shrinker);
+
+	arch_refine_vmap_space(&free_vmap_area_root, &free_vmap_area_list,
+			       vmap_area_cachep);
 }
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/5] arm64: mm: code and data partitioning for aslr
  2024-04-15 20:16 [PATCH 0/5] mm: code and data partitioning improvements Maxwell Bland
  2024-04-02 20:15 ` [PATCH 1/5] mm: allow arch refinement/skip for vmap alloc Maxwell Bland
@ 2024-04-03 21:08 ` Maxwell Bland
  2024-04-16 19:18   ` [PATCH 2/5 RESEND] " Maxwell Bland
  2024-04-17  5:14   ` [PATCH 2/5] " kernel test robot
  2024-04-15 19:51 ` [PATCH 5/5] ptdump: add state parameter for non-leaf callback Maxwell Bland
                   ` (3 subsequent siblings)
  5 siblings, 2 replies; 18+ messages in thread
From: Maxwell Bland @ 2024-04-03 21:08 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: Maxwell Bland, linux-kernel, Catalin Marinas, Will Deacon,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Zi Shen Lim, Mark Rutland, Ard Biesheuvel, Maxwell Bland,
	Kees Cook, Sami Tolvanen, Baoquan He, Jonathan Cameron,
	Greg Kroah-Hartman, Ryo Takakura, James Morse, Christophe Leroy,
	bpf

Uses hooks in the vmalloc infrastructure to prevent interleaving code
and data pages, working to both maintain compatible management
assumptions made by non-arch-specific code and make management of these
regions more precise and conformant, allowing, for example, the
maintenance of PXNTable bits on dynamically allocated memory or the
immutability of certain page middle directory and higher level
descriptors.

Signed-off-by: Maxwell Bland <mbland@motorola.com>
---
 arch/arm64/include/asm/module.h    | 12 +++++
 arch/arm64/include/asm/vmalloc.h   | 17 ++++++-
 arch/arm64/kernel/Makefile         |  2 +-
 arch/arm64/kernel/module.c         |  7 ++-
 arch/arm64/kernel/probes/kprobes.c |  7 +--
 arch/arm64/kernel/setup.c          |  4 ++
 arch/arm64/kernel/vmalloc.c        | 71 ++++++++++++++++++++++++++++++
 arch/arm64/mm/ptdump.c             |  4 +-
 arch/arm64/net/bpf_jit_comp.c      |  8 ++--
 9 files changed, 117 insertions(+), 15 deletions(-)
 create mode 100644 arch/arm64/kernel/vmalloc.c

diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 79550b22ba19..e50d7a240ad7 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -65,4 +65,16 @@ static inline const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
 	return NULL;
 }
 
+extern u64 module_direct_base __ro_after_init;
+extern u64 module_plt_base __ro_after_init;
+
+int __init module_init_limits(void);
+
+#define MODULES_ASLR_START ((module_plt_base) ? module_plt_base : \
+		module_direct_base)
+#define MODULES_ASLR_END ((module_plt_base) ? module_plt_base + SZ_2G : \
+		module_direct_base + SZ_128M)
+
+void *module_alloc(unsigned long size);
+
 #endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
index 38fafffe699f..93f8f1e2b1ce 100644
--- a/arch/arm64/include/asm/vmalloc.h
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -4,6 +4,9 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
+struct vmap_area;
+struct kmem_cache;
+
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
 
 #define arch_vmap_pud_supported arch_vmap_pud_supported
@@ -23,7 +26,7 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
 	return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
 }
 
-#endif
+#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
 
 #define arch_vmap_pgprot_tagged arch_vmap_pgprot_tagged
 static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
@@ -31,4 +34,16 @@ static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
 	return pgprot_tagged(prot);
 }
 
+#ifdef CONFIG_RANDOMIZE_BASE
+
+#define arch_skip_va arch_skip_va
+inline bool arch_skip_va(struct vmap_area *va, unsigned long vstart);
+
+#define arch_refine_vmap_space arch_refine_vmap_space
+inline void arch_refine_vmap_space(struct rb_root *root,
+					  struct list_head *head,
+					  struct kmem_cache *cachep);
+
+#endif /* CONFIG_RANDOMIZE_BASE */
+
 #endif /* _ASM_ARM64_VMALLOC_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 763824963ed1..4298a2168544 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -56,7 +56,7 @@ obj-$(CONFIG_ACPI)			+= acpi.o
 obj-$(CONFIG_ACPI_NUMA)			+= acpi_numa.o
 obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)	+= acpi_parking_protocol.o
 obj-$(CONFIG_PARAVIRT)			+= paravirt.o
-obj-$(CONFIG_RANDOMIZE_BASE)		+= kaslr.o
+obj-$(CONFIG_RANDOMIZE_BASE)		+= kaslr.o vmalloc.o
 obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
 obj-$(CONFIG_ELF_CORE)			+= elfcore.o
 obj-$(CONFIG_KEXEC_CORE)		+= machine_kexec.o relocate_kernel.o	\
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 47e0be610bb6..58329b27624d 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -26,8 +26,8 @@
 #include <asm/scs.h>
 #include <asm/sections.h>
 
-static u64 module_direct_base __ro_after_init = 0;
-static u64 module_plt_base __ro_after_init = 0;
+u64 module_direct_base __ro_after_init;
+u64 module_plt_base __ro_after_init;
 
 /*
  * Choose a random page-aligned base address for a window of 'size' bytes which
@@ -66,7 +66,7 @@ static u64 __init random_bounding_box(u64 size, u64 start, u64 end)
  * we may fall back to PLTs where they could have been avoided, but this keeps
  * the logic significantly simpler.
  */
-static int __init module_init_limits(void)
+int __init module_init_limits(void)
 {
 	u64 kernel_end = (u64)_end;
 	u64 kernel_start = (u64)_text;
@@ -108,7 +108,6 @@ static int __init module_init_limits(void)
 
 	return 0;
 }
-subsys_initcall(module_init_limits);
 
 void *module_alloc(unsigned long size)
 {
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 327855a11df2..89968f05177f 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -131,9 +131,10 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 
 void *alloc_insn_page(void)
 {
-	return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
-			GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
-			NUMA_NO_NODE, __builtin_return_address(0));
+	return __vmalloc_node_range(PAGE_SIZE, 1, MODULES_ASLR_START,
+			MODULES_ASLR_END, GFP_KERNEL, PAGE_KERNEL_ROX,
+			VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+			__builtin_return_address(0));
 }
 
 /* arm kprobe: install breakpoint in text */
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 65a052bf741f..908ee0ccc606 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -53,6 +53,7 @@
 #include <asm/efi.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/mmu_context.h>
+#include <asm/module.h>
 
 static int num_standard_resources;
 static struct resource *standard_resources;
@@ -321,6 +322,7 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
 
 	arm64_memblock_init();
 
+
 	paging_init();
 
 	acpi_table_upgrade();
@@ -366,6 +368,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
 			"This indicates a broken bootloader or old kernel\n",
 			boot_args[1], boot_args[2], boot_args[3]);
 	}
+
+	module_init_limits();
 }
 
 static inline bool cpu_can_disable(unsigned int cpu)
diff --git a/arch/arm64/kernel/vmalloc.c b/arch/arm64/kernel/vmalloc.c
new file mode 100644
index 000000000000..00a463f3692f
--- /dev/null
+++ b/arch/arm64/kernel/vmalloc.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AArch64 vmap area management code
+ *
+ * Author: Maxwell Bland <mbland@motorola.com>
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/elf.h>
+
+#include <asm/module.h>
+
+/*
+ * Prevents the allocation of new vmap_areas from dynamic code
+ * region if the virtual address requested is not explicitly the
+ * module region.
+ */
+inline bool arch_skip_va(struct vmap_area *va, unsigned long vstart)
+{
+	return (vstart != MODULES_ASLR_START &&
+			va->va_start >= MODULES_ASLR_START &&
+			va->va_end <= MODULES_ASLR_END);
+}
+
+/*
+ * Splits a vmap area in two and allocates a new area if needed
+ */
+inline struct vmap_area *
+try_split_alloc_vmap_area(struct rb_root *root,
+		struct list_head *head,
+		struct kmem_cache *vmap_area_cachep,
+		unsigned long addr)
+{
+	struct vmap_area *va;
+	int ret;
+	struct vmap_area *lva = NULL;
+
+	va = __find_vmap_area(addr, root);
+	if (!va) {
+		pr_err("%s: could not find vmap\n", __func__);
+		return NULL;
+	}
+
+	lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
+	if (!lva) {
+		pr_err("%s: unable to allocate va for range\n", __func__);
+		return NULL;
+	}
+	lva->va_start = addr;
+	lva->va_end = va->va_end;
+	ret = va_clip(root, head, va, addr, va->va_end - addr);
+	if (WARN_ON_ONCE(ret)) {
+		pr_err("%s: unable to clip code base region\n", __func__);
+		kmem_cache_free(vmap_area_cachep, lva);
+		return NULL;
+	}
+	insert_vmap_area_augment(lva, NULL, root, head);
+	return lva;
+}
+
+/*
+ * Run during vmalloc_init, ensures that there exist explicit rb tree
+ * node delineations between code and data
+ */
+inline void arch_refine_vmap_space(struct rb_root *root,
+		struct list_head *head,
+		struct kmem_cache *cachep)
+{
+	try_split_alloc_vmap_area(root, head, cachep, MODULES_ASLR_START);
+	try_split_alloc_vmap_area(root, head, cachep, MODULES_ASLR_END);
+}
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 6986827e0d64..796231a4fd63 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -261,9 +261,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
 		}
 		pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit,
 				   pg_level[st->level].name);
-		if (st->current_prot && pg_level[st->level].bits)
-			dump_prot(st, pg_level[st->level].bits,
-				  pg_level[st->level].num);
+		dump_prot(st, pg_level[st->level].bits, pg_level[st->level].num);
 		pt_dump_seq_puts(st->seq, "\n");
 
 		if (addr >= st->marker[1].start_address) {
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 122021f9bdfc..6ed6e00b8b4a 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -13,6 +13,8 @@
 #include <linux/memory.h>
 #include <linux/printk.h>
 #include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/moduleloader.h>
 
 #include <asm/asm-extable.h>
 #include <asm/byteorder.h>
@@ -1790,18 +1792,18 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len)
 
 u64 bpf_jit_alloc_exec_limit(void)
 {
-	return VMALLOC_END - VMALLOC_START;
+	return MODULES_ASLR_END - MODULES_ASLR_START;
 }
 
 void *bpf_jit_alloc_exec(unsigned long size)
 {
 	/* Memory is intended to be executable, reset the pointer tag. */
-	return kasan_reset_tag(vmalloc(size));
+	return kasan_reset_tag(module_alloc(size));
 }
 
 void bpf_jit_free_exec(void *addr)
 {
-	return vfree(addr);
+	return module_memfree(addr);
 }
 
 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/5] mm: add vaddr param to pmd_populate_kernel
  2024-04-16 19:18 ` [PATCH 3/5 RESEND] mm: add vaddr param to pmd_populate_kernel Maxwell Bland
@ 2024-04-05 18:37   ` Maxwell Bland
  0 siblings, 0 replies; 18+ messages in thread
From: Maxwell Bland @ 2024-04-05 18:37 UTC (permalink / raw)
  To: linux-mm
  Cc: Maxwell Bland, Richard Henderson, Ivan Kokshaysky, Matt Turner,
	Vineet Gupta, Alexander Potapenko, Marco Elver, Dmitry Vyukov,
	Russell King, Andrey Ryabinin, Andrey Konovalov,
	Vincenzo Frascino, Catalin Marinas, Will Deacon, Guo Ren,
	Brian Cain, Huacai Chen, WANG Xuerui, Geert Uytterhoeven,
	Sam Creasey, Michal Simek, Thomas Bogendoerfer, Dinh Nguyen,
	Jonas Bonn, Stefan Kristiansson, Stafford Horne,
	James E.J. Bottomley, Helge Deller, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy, Aneesh Kumar K.V,
	Naveen N. Rao, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	Heiko Carstens, Vasily Gorbik, Alexander Gordeev,
	Christian Borntraeger, Sven Schnelle, Yoshinori Sato,
	Rich Felker, John Paul Adrian Glaubitz, David S. Miller,
	Andreas Larsson, Richard Weinberger, Anton Ivanov, Johannes Berg,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Andy Lutomirski, Peter Zijlstra, Chris Zankel,
	Max Filippov, Andrew Morton, Muchun Song, Dennis Zhou, Tejun Heo,
	Christoph Lameter, Maxwell Bland, Linus Walleij,
	David Hildenbrand, Arnd Bergmann, Ard Biesheuvel, Ryan Roberts,
	Mark Rutland, Nikhil V, Rick Edgecombe, Baolin Wang, Bibo Mao,
	Tianrui Zhao, Randy Dunlap, Vlastimil Babka, Kent Overstreet,
	Peter Xu, Jiangfeng Xiao, Alexandre Ghiti, Jisheng Zhang,
	Conor Dooley, Mason Huo, Sia Jee Heng, Song Shuai,
	Gerald Schaefer, Qi Zheng, Hugh Dickins, Jason Gunthorpe,
	Breno Leitao, Josh Poimboeuf, linux-alpha, linux-kernel,
	linux-snps-arc, kasan-dev, linux-arm-kernel, linux-csky,
	linux-hexagon, loongarch, linux-m68k, linux-mips, kvm,
	linux-openrisc, linux-parisc, linuxppc-dev, linux-riscv,
	linux-s390, linux-sh, sparclinux, linux-um

This patch affords each architecture the ability to condition the
population of page middle directory entries on the virtual address being
allocated, matching existing PTE infrastructure, easing the necessity of
performing a reverse page table walk in cases where the population
context is not readily accessible, i.e. dynamic vmalloc calls on arm64.

To achieve this goal, it modifies every call and implementation of the
pmd_populate_kernel function across architectures, ensuring uniform
adoption across all kernel deployments.

Signed-off-by: Maxwell Bland <mbland@motorola.com>

---

Hi all,

Thank you for taking the time to review this change. This effects many
subarchitectures so the maintainers list is large. Apologies in advance
if there is a specific maintainer I should have spoken with directly for
deployment across subprojects.

The reason for such a sweeping change is from 
lore.kernel.org/all/cf5409c3-254a-459b-8969-429db2ec6439@redhat.com

It is my understanding as well that some subarchitectures may have
separate "next" or development branches ahead of the main upstream
linux. Please let me know if a cherry-pick to that branch is desired and
I will do my best to check out and deploy it as possible.

 arch/alpha/include/asm/pgalloc.h             |  5 +++--
 arch/arc/include/asm/pgalloc.h               |  3 ++-
 arch/arc/mm/highmem.c                        |  2 +-
 arch/arm/include/asm/kfence.h                |  2 +-
 arch/arm/include/asm/pgalloc.h               |  3 ++-
 arch/arm/mm/kasan_init.c                     |  2 +-
 arch/arm/mm/mmu.c                            |  2 +-
 arch/arm64/include/asm/pgalloc.h             |  3 ++-
 arch/arm64/mm/trans_pgd.c                    |  2 +-
 arch/csky/include/asm/pgalloc.h              |  2 +-
 arch/hexagon/include/asm/pgalloc.h           |  2 +-
 arch/loongarch/include/asm/pgalloc.h         |  3 ++-
 arch/loongarch/mm/init.c                     |  2 +-
 arch/loongarch/mm/kasan_init.c               |  2 +-
 arch/m68k/include/asm/mcf_pgalloc.h          |  2 +-
 arch/m68k/include/asm/motorola_pgalloc.h     |  3 ++-
 arch/m68k/include/asm/sun3_pgalloc.h         |  3 ++-
 arch/microblaze/include/asm/pgalloc.h        |  2 +-
 arch/mips/include/asm/pgalloc.h              |  2 +-
 arch/mips/kvm/mmu.c                          |  2 +-
 arch/nios2/include/asm/pgalloc.h             |  2 +-
 arch/openrisc/include/asm/pgalloc.h          |  2 +-
 arch/parisc/include/asm/pgalloc.h            |  5 +++--
 arch/parisc/mm/init.c                        |  6 +++---
 arch/powerpc/include/asm/book3s/32/pgalloc.h |  2 +-
 arch/powerpc/include/asm/book3s/64/pgalloc.h |  2 +-
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  2 +-
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  2 +-
 arch/powerpc/mm/book3s64/radix_pgtable.c     |  2 +-
 arch/powerpc/mm/kasan/init_32.c              |  4 ++--
 arch/powerpc/mm/kasan/init_book3e_64.c       |  9 ++++++---
 arch/powerpc/mm/kasan/init_book3s_64.c       |  7 +++++--
 arch/powerpc/mm/nohash/book3e_pgtable.c      |  2 +-
 arch/powerpc/mm/pgtable_32.c                 |  4 ++--
 arch/riscv/include/asm/pgalloc.h             |  2 +-
 arch/riscv/kernel/hibernate.c                |  2 +-
 arch/s390/include/asm/pgalloc.h              |  2 +-
 arch/sh/include/asm/pgalloc.h                |  2 +-
 arch/sh/mm/init.c                            |  2 +-
 arch/sparc/include/asm/pgalloc_32.h          |  3 ++-
 arch/sparc/include/asm/pgalloc_64.h          |  4 ++--
 arch/sparc/mm/init_64.c                      |  8 ++++----
 arch/um/include/asm/pgalloc.h                |  4 ++--
 arch/x86/include/asm/pgalloc.h               |  3 ++-
 arch/x86/mm/init_64.c                        | 14 +++++++++++---
 arch/x86/mm/ioremap.c                        |  2 +-
 arch/x86/mm/kasan_init_64.c                  |  2 +-
 arch/xtensa/include/asm/pgalloc.h            |  2 +-
 include/linux/mm.h                           |  4 ++--
 mm/hugetlb_vmemmap.c                         |  4 ++--
 mm/kasan/init.c                              | 14 +++++++++-----
 mm/memory.c                                  |  4 ++--
 mm/percpu.c                                  |  2 +-
 mm/pgalloc-track.h                           |  3 ++-
 mm/sparse-vmemmap.c                          |  2 +-
 55 files changed, 107 insertions(+), 78 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index 68be7adbfe58..1d3d86cad3cc 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -7,7 +7,7 @@
 
 #include <asm-generic/pgalloc.h>
 
-/*      
+/*
  * Allocate and free page tables. The xxx_kernel() versions are
  * used to allocate a kernel page table - this turns on ASN bits
  * if any.
@@ -20,7 +20,8 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte)
 }
 
 static inline void
-pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte,
+		    unsigned long vaddr)
 {
 	pmd_set(pmd, pte);
 }
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 096b8ef58edb..c0ebfa44b204 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -34,7 +34,8 @@
 #include <asm-generic/pgalloc.h>
 
 static inline void
-pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte,
+		    unsigned long vaddr)
 {
 	/*
 	 * The cast to long below is OK in 32-bit PAE40 regime with long long pte
diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c
index c79912a6b196..2d327cf35722 100644
--- a/arch/arc/mm/highmem.c
+++ b/arch/arc/mm/highmem.c
@@ -57,7 +57,7 @@ static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr)
 		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
 		      __func__, PAGE_SIZE, PAGE_SIZE);
 
-	pmd_populate_kernel(&init_mm, pmd_k, pte_k);
+	pmd_populate_kernel(&init_mm, pmd_k, pte_k, kvaddr);
 	return pte_k;
 }
 
diff --git a/arch/arm/include/asm/kfence.h b/arch/arm/include/asm/kfence.h
index 7980d0f2271f..dd4e4325d354 100644
--- a/arch/arm/include/asm/kfence.h
+++ b/arch/arm/include/asm/kfence.h
@@ -19,7 +19,7 @@ static inline int split_pmd_page(pmd_t *pmd, unsigned long addr)
 
 	for (i = 0; i < PTRS_PER_PTE; i++)
 		set_pte_ext(pte + i, pfn_pte(pfn + i, PAGE_KERNEL), 0);
-	pmd_populate_kernel(&init_mm, pmd, pte);
+	pmd_populate_kernel(&init_mm, pmd, pte, addr);
 
 	flush_tlb_kernel_range(addr, addr + PMD_SIZE);
 	return 0;
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index a17f01235c29..0a88346db17e 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -122,7 +122,8 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
  * Ensure that we always set both PMD entries.
  */
 static inline void
-pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep,
+		    unsigned long vaddr)
 {
 	/*
 	 * The pmd must be loaded with the physical address of the PTE table
diff --git a/arch/arm/mm/kasan_init.c b/arch/arm/mm/kasan_init.c
index 111d4f703136..9b3af2dce71e 100644
--- a/arch/arm/mm/kasan_init.c
+++ b/arch/arm/mm/kasan_init.c
@@ -111,7 +111,7 @@ static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
 				      __func__, addr);
 				return;
 			}
-			pmd_populate_kernel(&init_mm, pmdp, p);
+			pmd_populate_kernel(&init_mm, pmdp, p, addr);
 			flush_pmd_entry(pmdp);
 		}
 
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index c24e29c0b9a4..3cfed8dc4a19 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -384,7 +384,7 @@ void __init early_fixmap_init(void)
 		     != FIXADDR_TOP >> PMD_SHIFT);
 
 	pmd = fixmap_pmd(FIXADDR_TOP);
-	pmd_populate_kernel(&init_mm, pmd, bm_pte);
+	pmd_populate_kernel(&init_mm, pmd, bm_pte, __fix_to_virt(FIXADDR_TOP));
 
 	pte_offset_fixmap = pte_offset_early_fixmap;
 }
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 8ff5f2a2579e..5785272144e8 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -124,7 +124,8 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
  * of the mm address space.
  */
 static inline void
-pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep,
+		    unsigned long vaddr)
 {
 	VM_BUG_ON(mm && mm != &init_mm);
 	__pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN);
diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c
index 5139a28130c0..f84244d13099 100644
--- a/arch/arm64/mm/trans_pgd.c
+++ b/arch/arm64/mm/trans_pgd.c
@@ -69,7 +69,7 @@ static int copy_pte(struct trans_pgd_info *info, pmd_t *dst_pmdp,
 	dst_ptep = trans_alloc(info);
 	if (!dst_ptep)
 		return -ENOMEM;
-	pmd_populate_kernel(NULL, dst_pmdp, dst_ptep);
+	pmd_populate_kernel(NULL, dst_pmdp, dst_ptep, addr);
 	dst_ptep = pte_offset_kernel(dst_pmdp, start);
 
 	src_ptep = pte_offset_kernel(src_pmdp, start);
diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h
index 9c84c9012e53..f2c244c58acf 100644
--- a/arch/csky/include/asm/pgalloc.h
+++ b/arch/csky/include/asm/pgalloc.h
@@ -11,7 +11,7 @@
 #include <asm-generic/pgalloc.h>
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-					pte_t *pte)
+					pte_t *pte, unsigned long vaddr)
 {
 	set_pmd(pmd, __pmd(__pa(pte)));
 }
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index 55988625e6fb..2be773a5ffeb 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -62,7 +62,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
  * kernel map of the active thread who's calling pmd_populate_kernel...
  */
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-				       pte_t *pte)
+				       pte_t *pte, unsigned long vaddr)
 {
 	extern spinlock_t kmap_gen_lock;
 	pmd_t *ppmd;
diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
index 4e2d6b7ca2ee..6384391e69bd 100644
--- a/arch/loongarch/include/asm/pgalloc.h
+++ b/arch/loongarch/include/asm/pgalloc.h
@@ -13,7 +13,8 @@
 #include <asm-generic/pgalloc.h>
 
 static inline void pmd_populate_kernel(struct mm_struct *mm,
-				       pmd_t *pmd, pte_t *pte)
+				       pmd_t *pmd, pte_t *pte,
+				       unsigned long vaddr)
 {
 	set_pmd(pmd, __pmd((unsigned long)pte));
 }
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 4dd53427f657..b8952899b120 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -200,7 +200,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr)
 		pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 		if (!pte)
 			panic("%s: Failed to allocate memory\n", __func__);
-		pmd_populate_kernel(&init_mm, pmd, pte);
+		pmd_populate_kernel(&init_mm, pmd, pte, addr);
 	}
 
 	return pte_offset_kernel(pmd, addr);
diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c
index c608adc99845..51d40ff43aa9 100644
--- a/arch/loongarch/mm/kasan_init.c
+++ b/arch/loongarch/mm/kasan_init.c
@@ -110,7 +110,7 @@ static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node,
 				__pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node);
 		if (!early)
 			memcpy(__va(pte_phys), kasan_early_shadow_pte, sizeof(kasan_early_shadow_pte));
-		pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys));
+		pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys), addr);
 	}
 
 	return pte_offset_kernel(pmdp, addr);
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index 302c5bf67179..989a1aaa8aa1 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -30,7 +30,7 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 
 #define pmd_populate(mm, pmd, pte) (pmd_val(*pmd) = (unsigned long)(pte))
 
-#define pmd_populate_kernel pmd_populate
+#define pmd_populate_kernel(mm, pmd, pte, vaddr) pmd_populate(mm, pmd, pte)
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pgtable,
 				  unsigned long address)
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index 74a817d9387f..74aec6965981 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -79,7 +79,8 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 }
 
 
-static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+				       pte_t *pte, unsigned long vaddr)
 {
 	pmd_set(pmd, pte);
 }
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 4a137eecb6fe..550283e8bf4d 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -23,7 +23,8 @@ do {								\
 	tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));	\
 } while (0)
 
-static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+				       pte_t *pte, unsigned long vaddr)
 {
 	pmd_val(*pmd) = __pa((unsigned long)pte);
 }
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 6c33b05f730f..b3cc2cd8fc50 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -35,7 +35,7 @@ extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 #define pmd_populate(mm, pmd, pte) \
 			(pmd_val(*(pmd)) = (unsigned long)page_address(pte))
 
-#define pmd_populate_kernel(mm, pmd, pte) \
+#define pmd_populate_kernel(mm, pmd, pte, vaddr) \
 		(pmd_val(*(pmd)) = (unsigned long) (pte))
 
 #endif /* _ASM_MICROBLAZE_PGALLOC_H */
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index f4440edcd8fe..fb71c8776a04 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -19,7 +19,7 @@
 #include <asm-generic/pgalloc.h>
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-	pte_t *pte)
+	pte_t *pte, unsigned long vaddr)
 {
 	set_pmd(pmd, __pmd((unsigned long)pte));
 }
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 467ee6b95ae1..47f48929a124 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -133,7 +133,7 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
 			return NULL;
 		new_pte = kvm_mmu_memory_cache_alloc(cache);
 		clear_page(new_pte);
-		pmd_populate_kernel(NULL, pmd, new_pte);
+		pmd_populate_kernel(NULL, pmd, new_pte, addr);
 	}
 	return pte_offset_kernel(pmd, addr);
 }
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index ce6bb8e74271..ea99d36a6fdd 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -15,7 +15,7 @@
 #include <asm-generic/pgalloc.h>
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-	pte_t *pte)
+	pte_t *pte, unsigned long vaddr)
 {
 	set_pmd(pmd, __pmd((unsigned long)pte));
 }
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index c6a73772a546..304cf8955bec 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -25,7 +25,7 @@
 
 extern int mem_init_done;
 
-#define pmd_populate_kernel(mm, pmd, pte) \
+#define pmd_populate_kernel(mm, pmd, pte, vaddr) \
 	set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)))
 
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index e3e142b1c5c5..cba92c90a62a 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -61,13 +61,14 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 #endif
 
 static inline void
-pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte, unsigned long vaddr)
 {
 	set_pmd(pmd, __pmd((PxD_FLAG_PRESENT | PxD_FLAG_VALID)
 		+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
 }
 
 #define pmd_populate(mm, pmd, pte_page) \
-	pmd_populate_kernel(mm, pmd, page_address(pte_page))
+	pmd_populate_kernel(mm, pmd, page_address(pte_page), \
+			    (unsigned long)page_to_virt(pte_page))
 
 #endif
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index f876af56e13f..1cf3aae67023 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -390,7 +390,7 @@ static void __ref map_pages(unsigned long start_vaddr,
 				pg_table = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 				if (!pg_table)
 					panic("page table allocation failed\n");
-				pmd_populate_kernel(NULL, pmd, pg_table);
+				pmd_populate_kernel(NULL, pmd, pg_table, vaddr);
 			}
 
 			pg_table = pte_offset_kernel(pmd, vaddr);
@@ -481,7 +481,7 @@ void free_initmem(void)
 	/* finally dump all the instructions which were cached, since the
 	 * pages are no-longer executable */
 	flush_icache_range(init_begin, init_end);
-	
+
 	free_initmem_default(POISON_FREE_INITMEM);
 
 	/* set up a new led state on systems shipped LED State panel */
@@ -694,7 +694,7 @@ static void __init fixmap_init(void)
 		if (!pte)
 			panic("fixmap: pte allocation failed.\n");
 
-		pmd_populate_kernel(&init_mm, pmd, pte);
+		pmd_populate_kernel(&init_mm, pmd, pte, addr);
 
 		addr += PAGE_SIZE;
 	} while (addr < end);
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index dc5c039eb28e..b85105158686 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -26,7 +26,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 /* #define pgd_populate(mm, pmd, pte)      BUG() */
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
-				       pte_t *pte)
+				       pte_t *pte, unsigned long vaddr)
 {
 	*pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
 }
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index dd2cff53a111..061c4be60166 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -156,7 +156,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 }
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-				       pte_t *pte)
+				       pte_t *pte, unsigned long vaddr)
 {
 	*pmd = __pmd(__pgtable_ptr_val(pte) | PMD_VAL_BITS);
 }
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 11eac371e7e0..2788ce005b95 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -15,7 +15,7 @@
 /* #define pgd_populate(mm, pmd, pte)      BUG() */
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
-				       pte_t *pte)
+				       pte_t *pte, unsigned long vaddr)
 {
 	if (IS_ENABLED(CONFIG_BOOKE))
 		*pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index e50b211becb3..d069443b4014 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -37,7 +37,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 }
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-				       pte_t *pte)
+				       pte_t *pte, unsigned long vaddr)
 {
 	pmd_set(pmd, (unsigned long)pte);
 }
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 15e88f1439ec..a70063cd6f64 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -104,7 +104,7 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa,
 	if (!pmd_present(*pmdp)) {
 		ptep = early_alloc_pgtable(PAGE_SIZE, nid,
 						region_start, region_end);
-		pmd_populate_kernel(&init_mm, pmdp, ptep);
+		pmd_populate_kernel(&init_mm, pmdp, ptep, ea);
 	}
 	ptep = pte_offset_kernel(pmdp, ea);
 
diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c
index aa9aa11927b2..22df07fd1af5 100644
--- a/arch/powerpc/mm/kasan/init_32.c
+++ b/arch/powerpc/mm/kasan/init_32.c
@@ -47,7 +47,7 @@ int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_
 		if (!new)
 			return -ENOMEM;
 		kasan_populate_pte(new, PAGE_KERNEL);
-		pmd_populate_kernel(&init_mm, pmd, new);
+		pmd_populate_kernel(&init_mm, pmd, new, k_cur);
 	}
 	return 0;
 }
@@ -187,6 +187,6 @@ void __init kasan_early_init(void)
 
 	do {
 		next = pgd_addr_end(addr, end);
-		pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte);
+		pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte, addr);
 	} while (pmd++, addr = next, addr != end);
 }
diff --git a/arch/powerpc/mm/kasan/init_book3e_64.c b/arch/powerpc/mm/kasan/init_book3e_64.c
index 11519e88dc6b..05ccdb88ff51 100644
--- a/arch/powerpc/mm/kasan/init_book3e_64.c
+++ b/arch/powerpc/mm/kasan/init_book3e_64.c
@@ -54,7 +54,7 @@ static int __init kasan_map_kernel_page(unsigned long ea, unsigned long pa, pgpr
 	if (kasan_pte_table(*pmdp)) {
 		ptep = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
 		memcpy(ptep, kasan_early_shadow_pte, PTE_TABLE_SIZE);
-		pmd_populate_kernel(&init_mm, pmdp, ptep);
+		pmd_populate_kernel(&init_mm, pmdp, ptep, ea);
 	}
 	ptep = pte_offset_kernel(pmdp, ea);
 
@@ -93,9 +93,12 @@ void __init kasan_early_init(void)
 		__set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
 			     &kasan_early_shadow_pte[i], zero_pte, 0);
 
-	for (i = 0; i < PTRS_PER_PMD; i++)
+	addr = KASAN_SHADOW_START
+	for (i = 0; i < PTRS_PER_PMD; i++) {
 		pmd_populate_kernel(&init_mm, &kasan_early_shadow_pmd[i],
-				    kasan_early_shadow_pte);
+				    kasan_early_shadow_pte, addr);
+		addr += PMD_SIZE;
+	}
 
 	for (i = 0; i < PTRS_PER_PUD; i++)
 		pud_populate(&init_mm, &kasan_early_shadow_pud[i],
diff --git a/arch/powerpc/mm/kasan/init_book3s_64.c b/arch/powerpc/mm/kasan/init_book3s_64.c
index 9300d641cf9a..79569734dc29 100644
--- a/arch/powerpc/mm/kasan/init_book3s_64.c
+++ b/arch/powerpc/mm/kasan/init_book3s_64.c
@@ -55,6 +55,7 @@ void __init kasan_init(void)
 	phys_addr_t start, end;
 	u64 i;
 	pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL);
+	void *vaddr_start = __va(start);
 
 	if (!early_radix_enabled()) {
 		pr_warn("KASAN not enabled as it requires radix!");
@@ -68,9 +69,11 @@ void __init kasan_init(void)
 		__set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
 			     &kasan_early_shadow_pte[i], zero_pte, 0);
 
-	for (i = 0; i < PTRS_PER_PMD; i++)
+	for (i = 0; i < PTRS_PER_PMD; i++) {
 		pmd_populate_kernel(&init_mm, &kasan_early_shadow_pmd[i],
-				    kasan_early_shadow_pte);
+				    kasan_early_shadow_pte,
+				    vaddr_start + i * PMD_SIZE);
+	}
 
 	for (i = 0; i < PTRS_PER_PUD; i++)
 		pud_populate(&init_mm, &kasan_early_shadow_pud[i],
diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c
index 1c5e4ecbebeb..930bdd7a3774 100644
--- a/arch/powerpc/mm/nohash/book3e_pgtable.c
+++ b/arch/powerpc/mm/nohash/book3e_pgtable.c
@@ -107,7 +107,7 @@ int __ref map_kernel_page(unsigned long ea, phys_addr_t pa, pgprot_t prot)
 		pmdp = pmd_offset(pudp, ea);
 		if (!pmd_present(*pmdp)) {
 			ptep = early_alloc_pgtable(PTE_TABLE_SIZE);
-			pmd_populate_kernel(&init_mm, pmdp, ptep);
+			pmd_populate_kernel(&init_mm, pmdp, ptep, ea);
 		}
 		ptep = pte_offset_kernel(pmdp, ea);
 	}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index cfd622ebf774..e6fbaf3e9072 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -43,7 +43,7 @@ notrace void __init early_ioremap_init(void)
 
 	for (; (s32)(FIXADDR_TOP - addr) > 0;
 	     addr += PGDIR_SIZE, ptep += PTRS_PER_PTE, pmdp++)
-		pmd_populate_kernel(&init_mm, pmdp, ptep);
+		pmd_populate_kernel(&init_mm, pmdp, ptep, addr);
 
 	early_ioremap_setup();
 }
@@ -64,7 +64,7 @@ pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va)
 	if (pmd_none(*pmdp)) {
 		pte_t *ptep = early_alloc_pgtable(PTE_FRAG_SIZE);
 
-		pmd_populate_kernel(&init_mm, pmdp, ptep);
+		pmd_populate_kernel(&init_mm, pmdp, ptep, va);
 	}
 	return pte_offset_kernel(pmdp, va);
 }
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index deaf971253a2..d619daeded7f 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -16,7 +16,7 @@
 #include <asm-generic/pgalloc.h>
 
 static inline void pmd_populate_kernel(struct mm_struct *mm,
-	pmd_t *pmd, pte_t *pte)
+	pmd_t *pmd, pte_t *pte, unsigned long vaddr)
 {
 	unsigned long pfn = virt_to_pfn(pte);
 
diff --git a/arch/riscv/kernel/hibernate.c b/arch/riscv/kernel/hibernate.c
index 671b686c0158..085123ad4fa8 100644
--- a/arch/riscv/kernel/hibernate.c
+++ b/arch/riscv/kernel/hibernate.c
@@ -176,7 +176,7 @@ static int temp_pgtable_map_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long
 		if (!dst_ptep)
 			return -ENOMEM;
 
-		pmd_populate_kernel(NULL, dst_pmdp, dst_ptep);
+		pmd_populate_kernel(NULL, dst_pmdp, dst_ptep, 0);
 	}
 
 	dst_ptep = pte_offset_kernel(dst_pmdp, start);
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 7b84ef6dc4b6..4143b3f9d610 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -131,7 +131,7 @@ static inline void pmd_populate(struct mm_struct *mm,
 	set_pmd(pmd, __pmd(_SEGMENT_ENTRY | __pa(pte)));
 }
 
-#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)
+#define pmd_populate_kernel(mm, pmd, pte, vaddr) pmd_populate(mm, pmd, pte)
 
 /*
  * page table entry allocation/free routines.
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index 5d8577ab1591..04b29eb9712b 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -21,7 +21,7 @@ extern void pmd_free(struct mm_struct *mm, pmd_t *pmd);
 #endif
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-				       pte_t *pte)
+				       pte_t *pte, unsigned long vaddr)
 {
 	set_pmd(pmd, __pmd((unsigned long)pte));
 }
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index bf1b54055316..c862572dbec8 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -157,7 +157,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
 		if (!pte)
 			panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
 			      __func__, PAGE_SIZE, PAGE_SIZE);
-		pmd_populate_kernel(&init_mm, pmd, pte);
+		pmd_populate_kernel(&init_mm, pmd, pte, 0);
 		BUG_ON(pte != pte_offset_kernel(pmd, 0));
 	}
 
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 4f73e87b22a3..558afcbd9016 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -53,7 +53,8 @@ static inline void free_pmd_fast(pmd_t * pmd)
 #define pmd_populate(mm, pmd, pte)	pmd_set(pmd, pte)
 
 void pmd_set(pmd_t *pmdp, pte_t *ptep);
-#define pmd_populate_kernel		pmd_populate
+#define pmd_populate_kernel(mm, pmd, pte, vaddr)	\
+	pmd_populate(mm, pmd, pte)
 
 pgtable_t pte_alloc_one(struct mm_struct *mm);
 
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index caa7632be4c2..185ad9637442 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -69,8 +69,8 @@ void pte_free(struct mm_struct *mm, pgtable_t ptepage);
 #define pte_free_defer pte_free_defer
 void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
 
-#define pmd_populate_kernel(MM, PMD, PTE)	pmd_set(MM, PMD, PTE)
-#define pmd_populate(MM, PMD, PTE)		pmd_set(MM, PMD, PTE)
+#define pmd_populate_kernel(MM, PMD, PTE, VADDR)	pmd_set(MM, PMD, PTE)
+#define pmd_populate(MM, PMD, PTE)			pmd_set(MM, PMD, PTE)
 
 void pgtable_free(void *table, bool is_page);
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 1ca9054d9b97..32b3c89f869d 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -5,7 +5,7 @@
  *  Copyright (C) 1996-1999 David S. Miller (davem@caip.rutgers.edu)
  *  Copyright (C) 1997-1999 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
- 
+
 #include <linux/extable.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -1843,7 +1843,7 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
 			if (!new)
 				goto err_alloc;
 			alloc_bytes += PAGE_SIZE;
-			pmd_populate_kernel(&init_mm, pmd, new);
+			pmd_populate_kernel(&init_mm, pmd, new, vstart);
 		}
 
 		pte = pte_offset_kernel(pmd, vstart);
@@ -2404,11 +2404,11 @@ void __init paging_init(void)
 	 * work.
 	 */
 	init_mm.pgd += ((shift) / (sizeof(pgd_t)));
-	
+
 	memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
 
 	inherit_prom_mappings();
-	
+
 	/* Ok, we can use our TLB miss and window trap handlers safely.  */
 	setup_tba();
 
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index de5e31c64793..300431ff61bb 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* 
+/*
  * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
  * Copyright 2003 PathScale, Inc.
  * Derived from include/asm-i386/pgalloc.h and include/asm-i386/pgtable.h
@@ -12,7 +12,7 @@
 
 #include <asm-generic/pgalloc.h>
 
-#define pmd_populate_kernel(mm, pmd, pte) \
+#define pmd_populate_kernel(mm, pmd, pte, vaddr) \
 	set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) __pa(pte)))
 
 #define pmd_populate(mm, pmd, pte) 				\
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index dcd836b59beb..3bc5e0cc7b38 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -62,7 +62,8 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
 }
 
 static inline void pmd_populate_kernel(struct mm_struct *mm,
-				       pmd_t *pmd, pte_t *pte)
+				       pmd_t *pmd, pte_t *pte,
+				       unsigned long vaddr)
 {
 	paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT);
 	set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 7e177856ee4f..ee4a73842466 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -73,7 +73,15 @@ static inline void fname##_init(struct mm_struct *mm,		\
 DEFINE_POPULATE(p4d_populate, p4d, pud, init)
 DEFINE_POPULATE(pgd_populate, pgd, p4d, init)
 DEFINE_POPULATE(pud_populate, pud, pmd, init)
-DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init)
+
+static inline void pmd_populate_kernel_init(struct mm_struct *mm,
+		pmd_t *arg1, pte_t *arg2, unsigned long arg3, bool init)
+{
+	if (init)
+		pmd_populate_kernel_safe(mm, arg1, arg2);
+	else
+		pmd_populate_kernel(mm, arg1, arg2, arg3);
+}
 
 #define DEFINE_ENTRY(type1, type2, init)			\
 static inline void set_##type1##_init(type1##_t *arg1,		\
@@ -286,7 +294,7 @@ static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr)
 {
 	if (pmd_none(*pmd)) {
 		pte_t *pte = (pte_t *) spp_getpage();
-		pmd_populate_kernel(&init_mm, pmd, pte);
+		pmd_populate_kernel(&init_mm, pmd, pte, vaddr);
 		if (pte != pte_offset_kernel(pmd, 0))
 			printk(KERN_ERR "PAGETABLE BUG #03!\n");
 	}
@@ -575,7 +583,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
 		paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init);
 
 		spin_lock(&init_mm.page_table_lock);
-		pmd_populate_kernel_init(&init_mm, pmd, pte, init);
+		pmd_populate_kernel_init(&init_mm, pmd, pte, init, __va(paddr));
 		spin_unlock(&init_mm.page_table_lock);
 	}
 	update_page_count(PG_LEVEL_2M, pages);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index aa7d279321ea..8844047fdaad 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -888,7 +888,7 @@ void __init early_ioremap_init(void)
 
 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
 	memset(bm_pte, 0, sizeof(bm_pte));
-	pmd_populate_kernel(&init_mm, pmd, bm_pte);
+	pmd_populate_kernel(&init_mm, pmd, bm_pte, fix_to_virt(FIX_BTMAP_BEGIN));
 
 	/*
 	 * The boot-ioremap range spans multiple pmds, for which
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 9dddf19a5571..95ae9e12fe41 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -53,7 +53,7 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
 		}
 
 		p = early_alloc(PAGE_SIZE, nid, true);
-		pmd_populate_kernel(&init_mm, pmd, p);
+		pmd_populate_kernel(&init_mm, pmd, p, addr);
 	}
 
 	pte = pte_offset_kernel(pmd, addr);
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index 7fc0f9126dd3..5359e4091b9a 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -21,7 +21,7 @@
  * inside the pgd, so has no extra memory associated with it.
  */
 
-#define pmd_populate_kernel(mm, pmdp, ptep)				     \
+#define pmd_populate_kernel(mm, pmdp, ptep, vaddr)			     \
 	(pmd_val(*(pmdp)) = ((unsigned long)ptep))
 #define pmd_populate(mm, pmdp, page)					     \
 	(pmd_val(*(pmdp)) = ((unsigned long)page_to_virt(page)))
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7b0ee64225de..7162667c0e37 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2802,7 +2802,7 @@ static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
 #endif
 
 int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
-int __pte_alloc_kernel(pmd_t *pmd);
+int __pte_alloc_kernel(pmd_t *pmd, unsigned long vaddr);
 
 #if defined(CONFIG_MMU)
 
@@ -2997,7 +2997,7 @@ pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
 		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
 
 #define pte_alloc_kernel(pmd, address)			\
-	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
+	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
 		NULL: pte_offset_kernel(pmd, address))
 
 #if USE_SPLIT_PMD_PTLOCKS
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index da177e49d956..cfbe3695fffb 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -58,7 +58,7 @@ static int vmemmap_split_pmd(pmd_t *pmd, struct page *head, unsigned long start,
 	if (!pgtable)
 		return -ENOMEM;
 
-	pmd_populate_kernel(&init_mm, &__pmd, pgtable);
+	pmd_populate_kernel(&init_mm, &__pmd, pgtable, start);
 
 	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
 		pte_t entry, *pte;
@@ -81,7 +81,7 @@ static int vmemmap_split_pmd(pmd_t *pmd, struct page *head, unsigned long start,
 
 		/* Make pte visible before pmd. See comment in pmd_install(). */
 		smp_wmb();
-		pmd_populate_kernel(&init_mm, pmd, pgtable);
+		pmd_populate_kernel(&init_mm, pmd, pgtable, start);
 		if (!(walk->flags & VMEMMAP_SPLIT_NO_TLB_FLUSH))
 			flush_tlb_kernel_range(start, start + PMD_SIZE);
 	} else {
diff --git a/mm/kasan/init.c b/mm/kasan/init.c
index 89895f38f722..813f8e8a801c 100644
--- a/mm/kasan/init.c
+++ b/mm/kasan/init.c
@@ -117,7 +117,8 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
 
 		if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) {
 			pmd_populate_kernel(&init_mm, pmd,
-					lm_alias(kasan_early_shadow_pte));
+					lm_alias(kasan_early_shadow_pte),
+					addr);
 			continue;
 		}
 
@@ -131,7 +132,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
 			if (!p)
 				return -ENOMEM;
 
-			pmd_populate_kernel(&init_mm, pmd, p);
+			pmd_populate_kernel(&init_mm, pmd, p, addr);
 		}
 		zero_pte_populate(pmd, addr, next);
 	} while (pmd++, addr = next, addr != end);
@@ -158,7 +159,8 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
 					lm_alias(kasan_early_shadow_pmd));
 			pmd = pmd_offset(pud, addr);
 			pmd_populate_kernel(&init_mm, pmd,
-					lm_alias(kasan_early_shadow_pte));
+					lm_alias(kasan_early_shadow_pte),
+					addr);
 			continue;
 		}
 
@@ -204,7 +206,8 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
 					lm_alias(kasan_early_shadow_pmd));
 			pmd = pmd_offset(pud, addr);
 			pmd_populate_kernel(&init_mm, pmd,
-					lm_alias(kasan_early_shadow_pte));
+					lm_alias(kasan_early_shadow_pte),
+					addr);
 			continue;
 		}
 
@@ -267,7 +270,8 @@ int __ref kasan_populate_early_shadow(const void *shadow_start,
 					lm_alias(kasan_early_shadow_pmd));
 			pmd = pmd_offset(pud, addr);
 			pmd_populate_kernel(&init_mm, pmd,
-					lm_alias(kasan_early_shadow_pte));
+					lm_alias(kasan_early_shadow_pte),
+					addr);
 			continue;
 		}
 
diff --git a/mm/memory.c b/mm/memory.c
index d2155ced45f8..67807ade9a0e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -447,7 +447,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
 	return 0;
 }
 
-int __pte_alloc_kernel(pmd_t *pmd)
+int __pte_alloc_kernel(pmd_t *pmd, unsigned long vaddr)
 {
 	pte_t *new = pte_alloc_one_kernel(&init_mm);
 	if (!new)
@@ -456,7 +456,7 @@ int __pte_alloc_kernel(pmd_t *pmd)
 	spin_lock(&init_mm.page_table_lock);
 	if (likely(pmd_none(*pmd))) {	/* Has another populated it ? */
 		smp_wmb(); /* See comment in pmd_install() */
-		pmd_populate_kernel(&init_mm, pmd, new);
+		pmd_populate_kernel(&init_mm, pmd, new, vaddr);
 		new = NULL;
 	}
 	spin_unlock(&init_mm.page_table_lock);
diff --git a/mm/percpu.c b/mm/percpu.c
index 4e11fc1e6def..fc83cf64baf6 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -3238,7 +3238,7 @@ void __init __weak pcpu_populate_pte(unsigned long addr)
 		new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
 		if (!new)
 			goto err_alloc;
-		pmd_populate_kernel(&init_mm, pmd, new);
+		pmd_populate_kernel(&init_mm, pmd, new, addr);
 	}
 
 	return;
diff --git a/mm/pgalloc-track.h b/mm/pgalloc-track.h
index e9e879de8649..ac983705a054 100644
--- a/mm/pgalloc-track.h
+++ b/mm/pgalloc-track.h
@@ -45,7 +45,8 @@ static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
 
 #define pte_alloc_kernel_track(pmd, address, mask)			\
 	((unlikely(pmd_none(*(pmd))) &&					\
-	  (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
+	  (__pte_alloc_kernel(pmd, address) ||				\
+	   ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
 		NULL: pte_offset_kernel(pmd, address))
 
 #endif /* _LINUX_PGALLOC_TRACK_H */
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index a2cbe44c48e1..6085c8339b65 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -191,7 +191,7 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
 		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
 		if (!p)
 			return NULL;
-		pmd_populate_kernel(&init_mm, pmd, p);
+		pmd_populate_kernel(&init_mm, pmd, p, addr);
 	}
 	return pmd;
 }
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/5] arm64: dynamic enforcement of PXNTable
  2024-04-16 19:18 ` [PATCH 4/5 RESEND] arm64: dynamic enforcement of PXNTable Maxwell Bland
@ 2024-04-12 15:00   ` Maxwell Bland
  2024-04-17  6:37   ` [PATCH 4/5 RESEND] " kernel test robot
  1 sibling, 0 replies; 18+ messages in thread
From: Maxwell Bland @ 2024-04-12 15:00 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: Maxwell Bland, Catalin Marinas, Will Deacon, Ard Biesheuvel,
	Maxwell Bland, linux-kernel

PXNTable is enforced during the init process to ensure that regions of
user memory and kernel data cannot be executed from, preventing attacks
which write to writable kernel pages and then modify the kernel's page
tables to make this code executable. This patch ensures this protection
is also preserved for dynamically allocated pages/pagetables, making it
so that all PMDs populated outside of the module code region are
PXNTable by default.

Signed-off-by: Maxwell Bland <mbland@motorola.com>
---
 arch/arm64/include/asm/pgalloc.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 5785272144e8..2376b4e7915c 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -12,6 +12,7 @@
 #include <asm/processor.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
+#include <asm/module.h>
 
 #define __HAVE_ARCH_PGD_FREE
 #define __HAVE_ARCH_PUD_FREE
@@ -119,6 +120,12 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
 	set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot));
 }
 
+static inline bool vaddr_is_data(unsigned long vaddr)
+{
+	return ((vaddr + PMD_SIZE < MODULES_ASLR_START || vaddr >= MODULES_ASLR_END) &&
+		(vaddr + PMD_SIZE < (unsigned long) _text || vaddr >= (unsigned long) _etext));
+}
+
 /*
  * Populate the pmdp entry with a pointer to the pte.  This pmd is part
  * of the mm address space.
@@ -127,8 +134,11 @@ static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep,
 		    unsigned long vaddr)
 {
+	pmdval_t pmd = PMD_TYPE_TABLE | PMD_TABLE_UXN;
 	VM_BUG_ON(mm && mm != &init_mm);
-	__pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN);
+	if (vaddr_is_data(vaddr))
+		pmd |= PMD_TABLE_PXN;
+	__pmd_populate(pmdp, __pa(ptep), pmd);
 }
 
 static inline void
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 5/5] ptdump: add state parameter for non-leaf callback
  2024-04-15 20:16 [PATCH 0/5] mm: code and data partitioning improvements Maxwell Bland
  2024-04-02 20:15 ` [PATCH 1/5] mm: allow arch refinement/skip for vmap alloc Maxwell Bland
  2024-04-03 21:08 ` [PATCH 2/5] arm64: mm: code and data partitioning for aslr Maxwell Bland
@ 2024-04-15 19:51 ` Maxwell Bland
  2024-04-16 19:18   ` [PATCH 5/5 RESEND] " Maxwell Bland
  2024-04-16 20:11   ` [PATCH 5/5] " Andrew Morton
  2024-04-16 19:18 ` [PATCH 0/5 RESEND] mm: code and data partitioning improvements Maxwell Bland
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 18+ messages in thread
From: Maxwell Bland @ 2024-04-15 19:51 UTC (permalink / raw)
  To: linux-mm
  Cc: Maxwell Bland, Catalin Marinas, Will Deacon, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy, Aneesh Kumar K.V,
	Naveen N. Rao, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	Alexander Gordeev, Gerald Schaefer, Heiko Carstens,
	Vasily Gorbik, Christian Borntraeger, Sven Schnelle, Dave Hansen,
	Andy Lutomirski, Peter Zijlstra, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, x86, H. Peter Anvin, Andrew Morton,
	Ard Biesheuvel, Mark Rutland, Maxwell Bland, Alexandre Ghiti,
	Yu Chien Peter Lin, Song Shuai, linux-arm-kernel, linux-kernel,
	linuxppc-dev, linux-riscv, linux-s390

ptdump can now note non-leaf descriptor entries, a useful addition for
debugging table descriptor permissions when working on related code

Signed-off-by: Maxwell Bland <mbland@motorola.com>
---
 arch/arm64/mm/ptdump.c          |  6 ++++--
 arch/powerpc/mm/ptdump/ptdump.c |  2 ++
 arch/riscv/mm/ptdump.c          |  6 ++++--
 arch/s390/mm/dump_pagetables.c  |  6 ++++--
 arch/x86/mm/dump_pagetables.c   |  3 ++-
 include/linux/ptdump.h          |  1 +
 mm/ptdump.c                     | 13 +++++++++++++
 7 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 796231a4fd63..1a6f4a3513e5 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -299,7 +299,8 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
 			.range = (struct ptdump_range[]){
 				{info->base_addr, end},
 				{0, 0}
-			}
+			},
+			.note_non_leaf = false
 		}
 	};
 
@@ -335,7 +336,8 @@ bool ptdump_check_wx(void)
 			.range = (struct ptdump_range[]) {
 				{_PAGE_OFFSET(vabits_actual), ~0UL},
 				{0, 0}
-			}
+			},
+			.note_non_leaf = false
 		}
 	};
 
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 9dc239967b77..89e673f5fd3d 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -307,6 +307,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 		.ptdump = {
 			.note_page = note_page,
 			.range = ptdump_range,
+			.note_non_leaf = false
 		}
 	};
 
@@ -340,6 +341,7 @@ bool ptdump_check_wx(void)
 		.ptdump = {
 			.note_page = note_page,
 			.range = ptdump_range,
+			.note_non_leaf = false
 		}
 	};
 
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
index 1289cc6d3700..b355633afcaf 100644
--- a/arch/riscv/mm/ptdump.c
+++ b/arch/riscv/mm/ptdump.c
@@ -328,7 +328,8 @@ static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo)
 			.range = (struct ptdump_range[]) {
 				{pinfo->base_addr, pinfo->end},
 				{0, 0}
-			}
+			},
+			.note_non_leaf = false
 		}
 	};
 
@@ -350,7 +351,8 @@ bool ptdump_check_wx(void)
 			.range = (struct ptdump_range[]) {
 				{KERN_VIRT_START, ULONG_MAX},
 				{0, 0}
-			}
+			},
+			.note_non_leaf = false
 		}
 	};
 
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index ffd07ed7b4af..6468cfd53e2a 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -200,7 +200,8 @@ bool ptdump_check_wx(void)
 			.range = (struct ptdump_range[]) {
 				{.start = 0, .end = max_addr},
 				{.start = 0, .end = 0},
-			}
+			},
+			.note_non_leaf = false
 		},
 		.seq = NULL,
 		.level = -1,
@@ -239,7 +240,8 @@ static int ptdump_show(struct seq_file *m, void *v)
 			.range = (struct ptdump_range[]) {
 				{.start = 0, .end = max_addr},
 				{.start = 0, .end = 0},
-			}
+			},
+			.note_non_leaf = false
 		},
 		.seq = m,
 		.level = -1,
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 89079ea73e65..43f00dfb955f 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -380,7 +380,8 @@ bool ptdump_walk_pgd_level_core(struct seq_file *m,
 		.ptdump = {
 			.note_page	= note_page,
 			.effective_prot = effective_prot,
-			.range		= ptdump_ranges
+			.range		= ptdump_ranges,
+			.note_non_leaf  = false
 		},
 		.level = -1,
 		.to_dmesg	= dmesg,
diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h
index 8dbd51ea8626..b3e793a5c77f 100644
--- a/include/linux/ptdump.h
+++ b/include/linux/ptdump.h
@@ -16,6 +16,7 @@ struct ptdump_state {
 			  int level, u64 val);
 	void (*effective_prot)(struct ptdump_state *st, int level, u64 val);
 	const struct ptdump_range *range;
+	bool note_non_leaf;
 };
 
 bool ptdump_walk_pgd_level_core(struct seq_file *m,
diff --git a/mm/ptdump.c b/mm/ptdump.c
index 106e1d66e9f9..97da7a765b22 100644
--- a/mm/ptdump.c
+++ b/mm/ptdump.c
@@ -41,6 +41,9 @@ static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
 	if (st->effective_prot)
 		st->effective_prot(st, 0, pgd_val(val));
 
+	if (st->note_non_leaf && !pgd_leaf(val))
+		st->note_page(st, addr, 0, pgd_val(val));
+
 	if (pgd_leaf(val)) {
 		st->note_page(st, addr, 0, pgd_val(val));
 		walk->action = ACTION_CONTINUE;
@@ -64,6 +67,9 @@ static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
 	if (st->effective_prot)
 		st->effective_prot(st, 1, p4d_val(val));
 
+	if (st->note_non_leaf && !p4d_leaf(val))
+		st->note_page(st, addr, 1, p4d_val(val));
+
 	if (p4d_leaf(val)) {
 		st->note_page(st, addr, 1, p4d_val(val));
 		walk->action = ACTION_CONTINUE;
@@ -87,6 +93,9 @@ static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
 	if (st->effective_prot)
 		st->effective_prot(st, 2, pud_val(val));
 
+	if (st->note_non_leaf && !pud_leaf(val))
+		st->note_page(st, addr, 2, pud_val(val));
+
 	if (pud_leaf(val)) {
 		st->note_page(st, addr, 2, pud_val(val));
 		walk->action = ACTION_CONTINUE;
@@ -108,6 +117,10 @@ static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
 
 	if (st->effective_prot)
 		st->effective_prot(st, 3, pmd_val(val));
+
+	if (st->note_non_leaf && !pmd_leaf(val))
+		st->note_page(st, addr, 3, pmd_val(val));
+
 	if (pmd_leaf(val)) {
 		st->note_page(st, addr, 3, pmd_val(val));
 		walk->action = ACTION_CONTINUE;
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 0/5] mm: code and data partitioning improvements
@ 2024-04-15 20:16 Maxwell Bland
  2024-04-02 20:15 ` [PATCH 1/5] mm: allow arch refinement/skip for vmap alloc Maxwell Bland
                   ` (5 more replies)
  0 siblings, 6 replies; 18+ messages in thread
From: Maxwell Bland @ 2024-04-15 20:16 UTC (permalink / raw)
  To: linux-mm
  Cc: Maxwell Bland, linux-kernel, linux-arm-kernel, linux-riscv,
	linuxppc-dev, Mark Rutland, Greg Kroah-Hartman,
	Christoph Hellwig, Christophe Leroy, David Hildenbrand,
	Conor Dooley

Managing allocations to ensure code and data pages are not interleaved
is not possible prior to this patch, as ASLR requires programming a
dynamic _text offset while the vmalloc infrastructure maintains static
VMALLOC_START and VMALLOC_END constants.

In systems where code and data are interleaved at a PTE granularity,
kernel improvements targeting the prevention of exploit stages which
modify page tables are inefficient and less effective as individual PTE
updates occur at high frequency and cannot be coarsely grouped at the
PMD level or greater.

This patch adds minimal arch-specific callbacks to the initialization of
vmalloc and when deciding whether to use a specific virtual memory area
to satisfy a vmalloc request to provide the capability to prevent the
allocation of specific virtual addresses under specific system states.
By default these hooks are unimplemented.

To further support the practical use of these callbacks, this patch also
adds a virtual address parameter to pmd_populate_kernel, so that this
interface matches the equivalent pte-level interface and architectures
are not required to perform a reverse page table lookup to determine the
vaddr being allocated during pmd creation.

To demonstrate the impact and value of these changes, this patch
implements support for dynamic PXNTable under aarch64 in 71 lines of
code (a single "if" check during memory allocation), by checking the
virtual address of a given vmalloc call to determine whether it is code
or data. From experience in trying to implement kernel page table
immutability and protections in KVM to prevent recent CVEs, e.g.
CVE-2024-1086, this is a necessary first step.

To better help maintainers and future developers, this patch expands
ptdump.c so that non-leaf page table descriptors can be more easily
noted in debug output by setting a note_non_leaf bool in the ptdump
state.

Signed-off-by: Maxwell Bland <mbland@motorola.com>

---

First, thank you to a number of maintainers (Mark Rutland, Greg KH,
Christoph Hellwig, Christophe Leroy, David Hildenbrand, Conor Dooley)
for their feedback on

<20240220203256.31153-1-mbland@motorola.com>
and
<CAP5Mv+ydhk=Ob4b40ZahGMgT-5+-VEHxtmA=-LkJiEOOU+K6hw@mail.gmail.com>

This patch is a further refinement and overhaul of these prior two
attempts. Also, apologies for the roughly two months delay between patch
submissions! I had Motorola work to do.

In support of testing this patch (but not included in this patch), I set
note_non_leaf to true under arch/arm64/mm/ptdump.c and added
PMD_TABLE_PXN to pte_bits to print out whether the PXNTable bit was set.
The txt files under the following directory can be diff'ed to see the
result:

github.com/maxwell-bland/linux-patch-data/tree/main/code_data_parting/ptdump

I also created a script to fetch and cross-compile the kernel for each
of the 21 subarchitectures which required fixes to provide a virtual
address to pmd_populate_kernel. I have no idea if it is useful and maybe
one already exists, but it worked well for me over some alternatives
(xcross, buildroot):

github.com/maxwell-bland/x-linux

As with the last patchset, I also measured performance using Torvald's 
test-tlb program on an aarch64 QEMU instance, with results here:

github.com/maxwell-bland/linux-patch-data/tree/main/code_data_parting/tlbperf

As all changes to other arches are effectively no-ops, performance
impacts in those domains are negligible. 

Maxwell Bland (5):
  mm: allow arch refinement/skip for vmap alloc
  arm64: mm: code and data partitioning for aslr
  mm: add vaddr param to pmd_populate_kernel
  arm64: dynamic enforcement of PXNTable
  ptdump: add state parameter for non-leaf callback

 arch/alpha/include/asm/pgalloc.h             |  5 +-
 arch/arc/include/asm/pgalloc.h               |  3 +-
 arch/arc/mm/highmem.c                        |  2 +-
 arch/arm/include/asm/kfence.h                |  2 +-
 arch/arm/include/asm/pgalloc.h               |  3 +-
 arch/arm/mm/kasan_init.c                     |  2 +-
 arch/arm/mm/mmu.c                            |  2 +-
 arch/arm64/include/asm/module.h              | 12 ++++
 arch/arm64/include/asm/pgalloc.h             | 15 ++++-
 arch/arm64/include/asm/vmalloc.h             | 17 ++++-
 arch/arm64/kernel/Makefile                   |  2 +-
 arch/arm64/kernel/module.c                   |  7 +-
 arch/arm64/kernel/probes/kprobes.c           |  7 +-
 arch/arm64/kernel/setup.c                    |  4 ++
 arch/arm64/kernel/vmalloc.c                  | 71 ++++++++++++++++++++
 arch/arm64/mm/ptdump.c                       | 10 +--
 arch/arm64/mm/trans_pgd.c                    |  2 +-
 arch/arm64/net/bpf_jit_comp.c                |  8 ++-
 arch/csky/include/asm/pgalloc.h              |  2 +-
 arch/hexagon/include/asm/pgalloc.h           |  2 +-
 arch/loongarch/include/asm/pgalloc.h         |  3 +-
 arch/loongarch/mm/init.c                     |  2 +-
 arch/loongarch/mm/kasan_init.c               |  2 +-
 arch/m68k/include/asm/mcf_pgalloc.h          |  2 +-
 arch/m68k/include/asm/motorola_pgalloc.h     |  3 +-
 arch/m68k/include/asm/sun3_pgalloc.h         |  3 +-
 arch/microblaze/include/asm/pgalloc.h        |  2 +-
 arch/mips/include/asm/pgalloc.h              |  2 +-
 arch/mips/kvm/mmu.c                          |  2 +-
 arch/nios2/include/asm/pgalloc.h             |  2 +-
 arch/openrisc/include/asm/pgalloc.h          |  2 +-
 arch/parisc/include/asm/pgalloc.h            |  5 +-
 arch/parisc/mm/init.c                        |  6 +-
 arch/powerpc/include/asm/book3s/32/pgalloc.h |  2 +-
 arch/powerpc/include/asm/book3s/64/pgalloc.h |  2 +-
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  2 +-
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  2 +-
 arch/powerpc/mm/book3s64/radix_pgtable.c     |  2 +-
 arch/powerpc/mm/kasan/init_32.c              |  4 +-
 arch/powerpc/mm/kasan/init_book3e_64.c       |  9 ++-
 arch/powerpc/mm/kasan/init_book3s_64.c       |  7 +-
 arch/powerpc/mm/nohash/book3e_pgtable.c      |  2 +-
 arch/powerpc/mm/pgtable_32.c                 |  4 +-
 arch/powerpc/mm/ptdump/ptdump.c              |  2 +
 arch/riscv/include/asm/pgalloc.h             |  2 +-
 arch/riscv/kernel/hibernate.c                |  2 +-
 arch/riscv/mm/ptdump.c                       |  6 +-
 arch/s390/include/asm/pgalloc.h              |  2 +-
 arch/s390/mm/dump_pagetables.c               |  6 +-
 arch/sh/include/asm/pgalloc.h                |  2 +-
 arch/sh/mm/init.c                            |  2 +-
 arch/sparc/include/asm/pgalloc_32.h          |  3 +-
 arch/sparc/include/asm/pgalloc_64.h          |  4 +-
 arch/sparc/mm/init_64.c                      |  8 +--
 arch/um/include/asm/pgalloc.h                |  4 +-
 arch/x86/include/asm/pgalloc.h               |  3 +-
 arch/x86/mm/dump_pagetables.c                |  3 +-
 arch/x86/mm/init_64.c                        | 14 +++-
 arch/x86/mm/ioremap.c                        |  2 +-
 arch/x86/mm/kasan_init_64.c                  |  2 +-
 arch/xtensa/include/asm/pgalloc.h            |  2 +-
 include/linux/mm.h                           |  4 +-
 include/linux/ptdump.h                       |  1 +
 include/linux/vmalloc.h                      | 24 +++++++
 mm/hugetlb_vmemmap.c                         |  4 +-
 mm/kasan/init.c                              | 14 ++--
 mm/memory.c                                  |  4 +-
 mm/percpu.c                                  |  2 +-
 mm/pgalloc-track.h                           |  3 +-
 mm/ptdump.c                                  | 13 ++++
 mm/sparse-vmemmap.c                          |  2 +-
 mm/vmalloc.c                                 | 16 +++--
 72 files changed, 299 insertions(+), 107 deletions(-)
 create mode 100644 arch/arm64/kernel/vmalloc.c


base-commit: 0bbac3facb5d6cc0171c45c9873a2dc96bea9680
-- 
2.39.2


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 0/5 RESEND] mm: code and data partitioning improvements
  2024-04-15 20:16 [PATCH 0/5] mm: code and data partitioning improvements Maxwell Bland
                   ` (2 preceding siblings ...)
  2024-04-15 19:51 ` [PATCH 5/5] ptdump: add state parameter for non-leaf callback Maxwell Bland
@ 2024-04-16 19:18 ` Maxwell Bland
  2024-04-16 19:18 ` [PATCH 3/5 RESEND] mm: add vaddr param to pmd_populate_kernel Maxwell Bland
  2024-04-16 19:18 ` [PATCH 4/5 RESEND] arm64: dynamic enforcement of PXNTable Maxwell Bland
  5 siblings, 0 replies; 18+ messages in thread
From: Maxwell Bland @ 2024-04-16 19:18 UTC (permalink / raw)
  To: linux-mm
  Cc: Maxwell Bland, linux-kernel, linux-arm-kernel, linux-riscv,
	linuxppc-dev, Mark Rutland, Greg Kroah-Hartman,
	Christoph Hellwig, Christophe Leroy, David Hildenbrand,
	Conor Dooley

Managing allocations to ensure code and data pages are not interleaved
is not possible prior to this patch, as ASLR requires programming a
dynamic _text offset while the vmalloc infrastructure maintains static
VMALLOC_START and VMALLOC_END constants.

In systems where code and data are interleaved at a PTE granularity,
kernel improvements targeting the prevention of exploit stages which
modify page tables are inefficient and less effective as individual PTE
updates occur at high frequency and cannot be coarsely grouped at the
PMD level or greater.

This patch adds minimal arch-specific callbacks to the initialization of
vmalloc and when deciding whether to use a specific virtual memory area
to satisfy a vmalloc request to provide the capability to prevent the
allocation of specific virtual addresses under specific system states.
By default these hooks are unimplemented.

To further support the practical use of these callbacks, this patch also
adds a virtual address parameter to pmd_populate_kernel, so that this
interface matches the equivalent pte-level interface and architectures
are not required to perform a reverse page table lookup to determine the
vaddr being allocated during pmd creation.

To demonstrate the impact and value of these changes, this patch
implements support for dynamic PXNTable under aarch64 in 71 lines of
code (a single "if" check during memory allocation), by checking the
virtual address of a given vmalloc call to determine whether it is code
or data. From experience in trying to implement kernel page table
immutability and protections in KVM to prevent recent CVEs, e.g.
CVE-2024-1086, this is a necessary first step.

To better help maintainers and future developers, this patch expands
ptdump.c so that non-leaf page table descriptors can be more easily
noted in debug output by setting a note_non_leaf bool in the ptdump
state.

Signed-off-by: Maxwell Bland <mbland@motorola.com>

---

Zero-eth, apologies for the triple mail of these patches. I am in the process
of setting up a new SMTP/mail server for Motorola, but until then I've needed
to script the raw SMTP in order to send appropriately formatted patch emails.

First, thank you to a number of maintainers (Mark Rutland, Greg KH,
Christoph Hellwig, Christophe Leroy, David Hildenbrand, Conor Dooley)
for their feedback on

<20240220203256.31153-1-mbland@motorola.com>
and
<CAP5Mv+ydhk=Ob4b40ZahGMgT-5+-VEHxtmA=-LkJiEOOU+K6hw@mail.gmail.com>

This patch is a further refinement and overhaul of these prior two
attempts. Also, apologies for the roughly two months delay between patch
submissions! I had Motorola work to do.

In support of testing this patch (but not included in this patch), I set
note_non_leaf to true under arch/arm64/mm/ptdump.c and added
PMD_TABLE_PXN to pte_bits to print out whether the PXNTable bit was set.
The txt files under the following directory can be diff'ed to see the
result:

github.com/maxwell-bland/linux-patch-data/tree/main/code_data_parting/ptdump

I also created a script to fetch and cross-compile the kernel for each
of the 21 subarchitectures which required fixes to provide a virtual
address to pmd_populate_kernel. I have no idea if it is useful and maybe
one already exists, but it worked well for me over some alternatives
(xcross, buildroot):

github.com/maxwell-bland/x-linux

As with the last patchset, I also measured performance using Torvald's 
test-tlb program on an aarch64 QEMU instance, with results here:

github.com/maxwell-bland/linux-patch-data/tree/main/code_data_parting/tlbperf

As all changes to other arches are effectively no-ops, performance
impacts in those domains are negligible. 

Maxwell Bland (5):
  mm: allow arch refinement/skip for vmap alloc
  arm64: mm: code and data partitioning for aslr
  mm: add vaddr param to pmd_populate_kernel
  arm64: dynamic enforcement of PXNTable
  ptdump: add state parameter for non-leaf callback

 arch/alpha/include/asm/pgalloc.h             |  5 +-
 arch/arc/include/asm/pgalloc.h               |  3 +-
 arch/arc/mm/highmem.c                        |  2 +-
 arch/arm/include/asm/kfence.h                |  2 +-
 arch/arm/include/asm/pgalloc.h               |  3 +-
 arch/arm/mm/kasan_init.c                     |  2 +-
 arch/arm/mm/mmu.c                            |  2 +-
 arch/arm64/include/asm/module.h              | 12 ++++
 arch/arm64/include/asm/pgalloc.h             | 15 ++++-
 arch/arm64/include/asm/vmalloc.h             | 17 ++++-
 arch/arm64/kernel/Makefile                   |  2 +-
 arch/arm64/kernel/module.c                   |  7 +-
 arch/arm64/kernel/probes/kprobes.c           |  7 +-
 arch/arm64/kernel/setup.c                    |  4 ++
 arch/arm64/kernel/vmalloc.c                  | 71 ++++++++++++++++++++
 arch/arm64/mm/ptdump.c                       | 10 +--
 arch/arm64/mm/trans_pgd.c                    |  2 +-
 arch/arm64/net/bpf_jit_comp.c                |  8 ++-
 arch/csky/include/asm/pgalloc.h              |  2 +-
 arch/hexagon/include/asm/pgalloc.h           |  2 +-
 arch/loongarch/include/asm/pgalloc.h         |  3 +-
 arch/loongarch/mm/init.c                     |  2 +-
 arch/loongarch/mm/kasan_init.c               |  2 +-
 arch/m68k/include/asm/mcf_pgalloc.h          |  2 +-
 arch/m68k/include/asm/motorola_pgalloc.h     |  3 +-
 arch/m68k/include/asm/sun3_pgalloc.h         |  3 +-
 arch/microblaze/include/asm/pgalloc.h        |  2 +-
 arch/mips/include/asm/pgalloc.h              |  2 +-
 arch/mips/kvm/mmu.c                          |  2 +-
 arch/nios2/include/asm/pgalloc.h             |  2 +-
 arch/openrisc/include/asm/pgalloc.h          |  2 +-
 arch/parisc/include/asm/pgalloc.h            |  5 +-
 arch/parisc/mm/init.c                        |  6 +-
 arch/powerpc/include/asm/book3s/32/pgalloc.h |  2 +-
 arch/powerpc/include/asm/book3s/64/pgalloc.h |  2 +-
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  2 +-
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  2 +-
 arch/powerpc/mm/book3s64/radix_pgtable.c     |  2 +-
 arch/powerpc/mm/kasan/init_32.c              |  4 +-
 arch/powerpc/mm/kasan/init_book3e_64.c       |  9 ++-
 arch/powerpc/mm/kasan/init_book3s_64.c       |  7 +-
 arch/powerpc/mm/nohash/book3e_pgtable.c      |  2 +-
 arch/powerpc/mm/pgtable_32.c                 |  4 +-
 arch/powerpc/mm/ptdump/ptdump.c              |  2 +
 arch/riscv/include/asm/pgalloc.h             |  2 +-
 arch/riscv/kernel/hibernate.c                |  2 +-
 arch/riscv/mm/ptdump.c                       |  6 +-
 arch/s390/include/asm/pgalloc.h              |  2 +-
 arch/s390/mm/dump_pagetables.c               |  6 +-
 arch/sh/include/asm/pgalloc.h                |  2 +-
 arch/sh/mm/init.c                            |  2 +-
 arch/sparc/include/asm/pgalloc_32.h          |  3 +-
 arch/sparc/include/asm/pgalloc_64.h          |  4 +-
 arch/sparc/mm/init_64.c                      |  8 +--
 arch/um/include/asm/pgalloc.h                |  4 +-
 arch/x86/include/asm/pgalloc.h               |  3 +-
 arch/x86/mm/dump_pagetables.c                |  3 +-
 arch/x86/mm/init_64.c                        | 14 +++-
 arch/x86/mm/ioremap.c                        |  2 +-
 arch/x86/mm/kasan_init_64.c                  |  2 +-
 arch/xtensa/include/asm/pgalloc.h            |  2 +-
 include/linux/mm.h                           |  4 +-
 include/linux/ptdump.h                       |  1 +
 include/linux/vmalloc.h                      | 24 +++++++
 mm/hugetlb_vmemmap.c                         |  4 +-
 mm/kasan/init.c                              | 14 ++--
 mm/memory.c                                  |  4 +-
 mm/percpu.c                                  |  2 +-
 mm/pgalloc-track.h                           |  3 +-
 mm/ptdump.c                                  | 13 ++++
 mm/sparse-vmemmap.c                          |  2 +-
 mm/vmalloc.c                                 | 16 +++--
 72 files changed, 299 insertions(+), 107 deletions(-)
 create mode 100644 arch/arm64/kernel/vmalloc.c


base-commit: 0bbac3facb5d6cc0171c45c9873a2dc96bea9680
-- 
2.39.2


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/5 RESEND] mm: allow arch refinement/skip for vmap alloc
  2024-04-02 20:15 ` [PATCH 1/5] mm: allow arch refinement/skip for vmap alloc Maxwell Bland
@ 2024-04-16 19:18   ` Maxwell Bland
  2024-04-18  8:55   ` [PATCH 1/5] " Uladzislau Rezki
  1 sibling, 0 replies; 18+ messages in thread
From: Maxwell Bland @ 2024-04-16 19:18 UTC (permalink / raw)
  To: linux-mm
  Cc: Maxwell Bland, linux-kernel, Andrew Morton, Uladzislau Rezki,
	Christoph Hellwig, Mark Rutland, Greg Kroah-Hartman,
	Christoph Hellwig, Christophe Leroy, David Hildenbrand,
	Conor Dooley, Lorenzo Stoakes

Makes red black tree allocation more flexible on a per-architecture
basis by introducing an optional hooks to refine the red-black tree
structuring and exposing vmalloc functions for clipping vmap areas,
finding vmap areas, and inserting vmap areas.

With this patch, the red-black vmap tree can be refined to account for
architecture-specific memory management operations, most notably address
space layout randomization, as these features conflict with generic
management of a single vmalloc_start to vmalloc_end range as given by
mm/vmalloc.c.

For example, x86 is forced to restrict aslr to 1024 possible locations,
which is a very, very small number, and arm64 breaks standard code/data
partitioning altogether, which prevents the enforcement of performant
immmutability on kernel page tables.

Signed-off-by: Maxwell Bland <mbland@motorola.com>
---
 include/linux/vmalloc.h | 24 ++++++++++++++++++++++++
 mm/vmalloc.c            | 16 ++++++++++------
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 98ea90e90439..3c5ce7ee0bea 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -12,6 +12,7 @@
 
 #include <asm/vmalloc.h>
 
+struct kmem_cache;
 struct vm_area_struct;		/* vma defining user mapping in mm_types.h */
 struct notifier_block;		/* in notifier.h */
 struct iov_iter;		/* in uio.h */
@@ -125,6 +126,21 @@ static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
 }
 #endif
 
+#ifndef arch_skip_va
+static inline bool arch_skip_va(struct vmap_area *va, unsigned long vstart)
+{
+	return false;
+}
+#endif
+
+#ifndef arch_refine_vmap_space
+static inline void arch_refine_vmap_space(struct rb_root *root,
+					  struct list_head *head,
+					  struct kmem_cache *cachep)
+{
+}
+#endif
+
 /*
  *	Highlevel APIs for driver use
  */
@@ -214,6 +230,14 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
 void free_vm_area(struct vm_struct *area);
 extern struct vm_struct *remove_vm_area(const void *addr);
 extern struct vm_struct *find_vm_area(const void *addr);
+extern void insert_vmap_area_augment(struct vmap_area *va, struct rb_node *from,
+				     struct rb_root *root,
+				     struct list_head *head);
+extern int va_clip(struct rb_root *root, struct list_head *head,
+		   struct vmap_area *va, unsigned long nva_start_addr,
+		   unsigned long size);
+extern struct vmap_area *__find_vmap_area(unsigned long addr,
+					  struct rb_root *root);
 struct vmap_area *find_vmap_area(unsigned long addr);
 
 static inline bool is_vm_area_hugepages(const void *addr)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 68fa001648cc..de4577a3708e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -989,7 +989,7 @@ unsigned long vmalloc_nr_pages(void)
 	return atomic_long_read(&nr_vmalloc_pages);
 }
 
-static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
+struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
 {
 	struct rb_node *n = root->rb_node;
 
@@ -1322,7 +1322,7 @@ insert_vmap_area(struct vmap_area *va,
 		link_va(va, root, parent, link, head);
 }
 
-static void
+void
 insert_vmap_area_augment(struct vmap_area *va,
 	struct rb_node *from, struct rb_root *root,
 	struct list_head *head)
@@ -1501,7 +1501,7 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size,
 				vstart < va->va_start) {
 			node = node->rb_left;
 		} else {
-			if (is_within_this_va(va, size, align, vstart))
+			if (!arch_skip_va(va, vstart) && is_within_this_va(va, size, align, vstart))
 				return va;
 
 			/*
@@ -1522,7 +1522,8 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size,
 			 */
 			while ((node = rb_parent(node))) {
 				va = rb_entry(node, struct vmap_area, rb_node);
-				if (is_within_this_va(va, size, align, vstart))
+				if (!arch_skip_va(va, vstart) &&
+				    is_within_this_va(va, size, align, vstart))
 					return va;
 
 				if (get_subtree_max_size(node->rb_right) >= length &&
@@ -1554,7 +1555,7 @@ find_vmap_lowest_linear_match(struct list_head *head, unsigned long size,
 	struct vmap_area *va;
 
 	list_for_each_entry(va, head, list) {
-		if (!is_within_this_va(va, size, align, vstart))
+		if (arch_skip_va(va, vstart) || !is_within_this_va(va, size, align, vstart))
 			continue;
 
 		return va;
@@ -1617,7 +1618,7 @@ classify_va_fit_type(struct vmap_area *va,
 	return type;
 }
 
-static __always_inline int
+__always_inline int
 va_clip(struct rb_root *root, struct list_head *head,
 		struct vmap_area *va, unsigned long nva_start_addr,
 		unsigned long size)
@@ -5129,4 +5130,7 @@ void __init vmalloc_init(void)
 	vmap_node_shrinker->count_objects = vmap_node_shrink_count;
 	vmap_node_shrinker->scan_objects = vmap_node_shrink_scan;
 	shrinker_register(vmap_node_shrinker);
+
+	arch_refine_vmap_space(&free_vmap_area_root, &free_vmap_area_list,
+			       vmap_area_cachep);
 }
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/5 RESEND] arm64: mm: code and data partitioning for aslr
  2024-04-03 21:08 ` [PATCH 2/5] arm64: mm: code and data partitioning for aslr Maxwell Bland
@ 2024-04-16 19:18   ` Maxwell Bland
  2024-04-17  5:14   ` [PATCH 2/5] " kernel test robot
  1 sibling, 0 replies; 18+ messages in thread
From: Maxwell Bland @ 2024-04-16 19:18 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: Maxwell Bland, linux-kernel, Catalin Marinas, Will Deacon,
	Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Zi Shen Lim, Mark Rutland, Ard Biesheuvel, Maxwell Bland,
	Kees Cook, Sami Tolvanen, Baoquan He, Jonathan Cameron,
	Greg Kroah-Hartman, Ryo Takakura, James Morse, Christophe Leroy,
	Mark Rutland, Greg Kroah-Hartman, Christoph Hellwig,
	Christophe Leroy, David Hildenbrand, Conor Dooley, bpf

Uses hooks in the vmalloc infrastructure to prevent interleaving code
and data pages, working to both maintain compatible management
assumptions made by non-arch-specific code and make management of these
regions more precise and conformant, allowing, for example, the
maintenance of PXNTable bits on dynamically allocated memory or the
immutability of certain page middle directory and higher level
descriptors.

Signed-off-by: Maxwell Bland <mbland@motorola.com>
---
 arch/arm64/include/asm/module.h    | 12 +++++
 arch/arm64/include/asm/vmalloc.h   | 17 ++++++-
 arch/arm64/kernel/Makefile         |  2 +-
 arch/arm64/kernel/module.c         |  7 ++-
 arch/arm64/kernel/probes/kprobes.c |  7 +--
 arch/arm64/kernel/setup.c          |  4 ++
 arch/arm64/kernel/vmalloc.c        | 71 ++++++++++++++++++++++++++++++
 arch/arm64/mm/ptdump.c             |  4 +-
 arch/arm64/net/bpf_jit_comp.c      |  8 ++--
 9 files changed, 117 insertions(+), 15 deletions(-)
 create mode 100644 arch/arm64/kernel/vmalloc.c

diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 79550b22ba19..e50d7a240ad7 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -65,4 +65,16 @@ static inline const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
 	return NULL;
 }
 
+extern u64 module_direct_base __ro_after_init;
+extern u64 module_plt_base __ro_after_init;
+
+int __init module_init_limits(void);
+
+#define MODULES_ASLR_START ((module_plt_base) ? module_plt_base : \
+		module_direct_base)
+#define MODULES_ASLR_END ((module_plt_base) ? module_plt_base + SZ_2G : \
+		module_direct_base + SZ_128M)
+
+void *module_alloc(unsigned long size);
+
 #endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
index 38fafffe699f..93f8f1e2b1ce 100644
--- a/arch/arm64/include/asm/vmalloc.h
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -4,6 +4,9 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
+struct vmap_area;
+struct kmem_cache;
+
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
 
 #define arch_vmap_pud_supported arch_vmap_pud_supported
@@ -23,7 +26,7 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
 	return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
 }
 
-#endif
+#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
 
 #define arch_vmap_pgprot_tagged arch_vmap_pgprot_tagged
 static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
@@ -31,4 +34,16 @@ static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
 	return pgprot_tagged(prot);
 }
 
+#ifdef CONFIG_RANDOMIZE_BASE
+
+#define arch_skip_va arch_skip_va
+inline bool arch_skip_va(struct vmap_area *va, unsigned long vstart);
+
+#define arch_refine_vmap_space arch_refine_vmap_space
+inline void arch_refine_vmap_space(struct rb_root *root,
+					  struct list_head *head,
+					  struct kmem_cache *cachep);
+
+#endif /* CONFIG_RANDOMIZE_BASE */
+
 #endif /* _ASM_ARM64_VMALLOC_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 763824963ed1..4298a2168544 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -56,7 +56,7 @@ obj-$(CONFIG_ACPI)			+= acpi.o
 obj-$(CONFIG_ACPI_NUMA)			+= acpi_numa.o
 obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)	+= acpi_parking_protocol.o
 obj-$(CONFIG_PARAVIRT)			+= paravirt.o
-obj-$(CONFIG_RANDOMIZE_BASE)		+= kaslr.o
+obj-$(CONFIG_RANDOMIZE_BASE)		+= kaslr.o vmalloc.o
 obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
 obj-$(CONFIG_ELF_CORE)			+= elfcore.o
 obj-$(CONFIG_KEXEC_CORE)		+= machine_kexec.o relocate_kernel.o	\
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 47e0be610bb6..58329b27624d 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -26,8 +26,8 @@
 #include <asm/scs.h>
 #include <asm/sections.h>
 
-static u64 module_direct_base __ro_after_init = 0;
-static u64 module_plt_base __ro_after_init = 0;
+u64 module_direct_base __ro_after_init;
+u64 module_plt_base __ro_after_init;
 
 /*
  * Choose a random page-aligned base address for a window of 'size' bytes which
@@ -66,7 +66,7 @@ static u64 __init random_bounding_box(u64 size, u64 start, u64 end)
  * we may fall back to PLTs where they could have been avoided, but this keeps
  * the logic significantly simpler.
  */
-static int __init module_init_limits(void)
+int __init module_init_limits(void)
 {
 	u64 kernel_end = (u64)_end;
 	u64 kernel_start = (u64)_text;
@@ -108,7 +108,6 @@ static int __init module_init_limits(void)
 
 	return 0;
 }
-subsys_initcall(module_init_limits);
 
 void *module_alloc(unsigned long size)
 {
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 327855a11df2..89968f05177f 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -131,9 +131,10 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 
 void *alloc_insn_page(void)
 {
-	return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
-			GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
-			NUMA_NO_NODE, __builtin_return_address(0));
+	return __vmalloc_node_range(PAGE_SIZE, 1, MODULES_ASLR_START,
+			MODULES_ASLR_END, GFP_KERNEL, PAGE_KERNEL_ROX,
+			VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+			__builtin_return_address(0));
 }
 
 /* arm kprobe: install breakpoint in text */
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 65a052bf741f..908ee0ccc606 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -53,6 +53,7 @@
 #include <asm/efi.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/mmu_context.h>
+#include <asm/module.h>
 
 static int num_standard_resources;
 static struct resource *standard_resources;
@@ -321,6 +322,7 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
 
 	arm64_memblock_init();
 
+
 	paging_init();
 
 	acpi_table_upgrade();
@@ -366,6 +368,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
 			"This indicates a broken bootloader or old kernel\n",
 			boot_args[1], boot_args[2], boot_args[3]);
 	}
+
+	module_init_limits();
 }
 
 static inline bool cpu_can_disable(unsigned int cpu)
diff --git a/arch/arm64/kernel/vmalloc.c b/arch/arm64/kernel/vmalloc.c
new file mode 100644
index 000000000000..00a463f3692f
--- /dev/null
+++ b/arch/arm64/kernel/vmalloc.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AArch64 vmap area management code
+ *
+ * Author: Maxwell Bland <mbland@motorola.com>
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/elf.h>
+
+#include <asm/module.h>
+
+/*
+ * Prevents the allocation of new vmap_areas from dynamic code
+ * region if the virtual address requested is not explicitly the
+ * module region.
+ */
+inline bool arch_skip_va(struct vmap_area *va, unsigned long vstart)
+{
+	return (vstart != MODULES_ASLR_START &&
+			va->va_start >= MODULES_ASLR_START &&
+			va->va_end <= MODULES_ASLR_END);
+}
+
+/*
+ * Splits a vmap area in two and allocates a new area if needed
+ */
+inline struct vmap_area *
+try_split_alloc_vmap_area(struct rb_root *root,
+		struct list_head *head,
+		struct kmem_cache *vmap_area_cachep,
+		unsigned long addr)
+{
+	struct vmap_area *va;
+	int ret;
+	struct vmap_area *lva = NULL;
+
+	va = __find_vmap_area(addr, root);
+	if (!va) {
+		pr_err("%s: could not find vmap\n", __func__);
+		return NULL;
+	}
+
+	lva = kmem_cache_alloc(vmap_area_cachep, GFP_NOWAIT);
+	if (!lva) {
+		pr_err("%s: unable to allocate va for range\n", __func__);
+		return NULL;
+	}
+	lva->va_start = addr;
+	lva->va_end = va->va_end;
+	ret = va_clip(root, head, va, addr, va->va_end - addr);
+	if (WARN_ON_ONCE(ret)) {
+		pr_err("%s: unable to clip code base region\n", __func__);
+		kmem_cache_free(vmap_area_cachep, lva);
+		return NULL;
+	}
+	insert_vmap_area_augment(lva, NULL, root, head);
+	return lva;
+}
+
+/*
+ * Run during vmalloc_init, ensures that there exist explicit rb tree
+ * node delineations between code and data
+ */
+inline void arch_refine_vmap_space(struct rb_root *root,
+		struct list_head *head,
+		struct kmem_cache *cachep)
+{
+	try_split_alloc_vmap_area(root, head, cachep, MODULES_ASLR_START);
+	try_split_alloc_vmap_area(root, head, cachep, MODULES_ASLR_END);
+}
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 6986827e0d64..796231a4fd63 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -261,9 +261,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
 		}
 		pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit,
 				   pg_level[st->level].name);
-		if (st->current_prot && pg_level[st->level].bits)
-			dump_prot(st, pg_level[st->level].bits,
-				  pg_level[st->level].num);
+		dump_prot(st, pg_level[st->level].bits, pg_level[st->level].num);
 		pt_dump_seq_puts(st->seq, "\n");
 
 		if (addr >= st->marker[1].start_address) {
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 122021f9bdfc..6ed6e00b8b4a 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -13,6 +13,8 @@
 #include <linux/memory.h>
 #include <linux/printk.h>
 #include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/moduleloader.h>
 
 #include <asm/asm-extable.h>
 #include <asm/byteorder.h>
@@ -1790,18 +1792,18 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len)
 
 u64 bpf_jit_alloc_exec_limit(void)
 {
-	return VMALLOC_END - VMALLOC_START;
+	return MODULES_ASLR_END - MODULES_ASLR_START;
 }
 
 void *bpf_jit_alloc_exec(unsigned long size)
 {
 	/* Memory is intended to be executable, reset the pointer tag. */
-	return kasan_reset_tag(vmalloc(size));
+	return kasan_reset_tag(module_alloc(size));
 }
 
 void bpf_jit_free_exec(void *addr)
 {
-	return vfree(addr);
+	return module_memfree(addr);
 }
 
 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/5 RESEND] mm: add vaddr param to pmd_populate_kernel
  2024-04-15 20:16 [PATCH 0/5] mm: code and data partitioning improvements Maxwell Bland
                   ` (3 preceding siblings ...)
  2024-04-16 19:18 ` [PATCH 0/5 RESEND] mm: code and data partitioning improvements Maxwell Bland
@ 2024-04-16 19:18 ` Maxwell Bland
  2024-04-05 18:37   ` [PATCH 3/5] " Maxwell Bland
  2024-04-16 19:18 ` [PATCH 4/5 RESEND] arm64: dynamic enforcement of PXNTable Maxwell Bland
  5 siblings, 1 reply; 18+ messages in thread
From: Maxwell Bland @ 2024-04-16 19:18 UTC (permalink / raw)
  To: linux-mm
  Cc: Maxwell Bland, Richard Henderson, Ivan Kokshaysky, Matt Turner,
	Vineet Gupta, Alexander Potapenko, Marco Elver, Dmitry Vyukov,
	Russell King, Andrey Ryabinin, Andrey Konovalov,
	Vincenzo Frascino, Catalin Marinas, Will Deacon, Guo Ren,
	Brian Cain, Huacai Chen, WANG Xuerui, Geert Uytterhoeven,
	Sam Creasey, Michal Simek, Thomas Bogendoerfer, Dinh Nguyen,
	Jonas Bonn, Stefan Kristiansson, Stafford Horne,
	James E.J. Bottomley, Helge Deller, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy, Aneesh Kumar K.V,
	Naveen N. Rao, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	Heiko Carstens, Vasily Gorbik, Alexander Gordeev,
	Christian Borntraeger, Sven Schnelle, Yoshinori Sato,
	Rich Felker, John Paul Adrian Glaubitz, David S. Miller,
	Andreas Larsson, Richard Weinberger, Anton Ivanov, Johannes Berg,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H. Peter Anvin, Andy Lutomirski, Peter Zijlstra, Chris Zankel,
	Max Filippov, Andrew Morton, Muchun Song, Dennis Zhou, Tejun Heo,
	Christoph Lameter, Maxwell Bland, Linus Walleij,
	David Hildenbrand, Arnd Bergmann, Ard Biesheuvel, Ryan Roberts,
	Mark Rutland, Nikhil V, Rick Edgecombe, Baolin Wang, Bibo Mao,
	Tianrui Zhao, Randy Dunlap, Vlastimil Babka, Kent Overstreet,
	Peter Xu, Jiangfeng Xiao, Alexandre Ghiti, Jisheng Zhang,
	Conor Dooley, Mason Huo, Sia Jee Heng, Song Shuai,
	Gerald Schaefer, Qi Zheng, Hugh Dickins, Jason Gunthorpe,
	Breno Leitao, Josh Poimboeuf, linux-alpha, linux-kernel,
	linux-snps-arc, kasan-dev, linux-arm-kernel, linux-csky,
	linux-hexagon, loongarch, linux-m68k, linux-mips, kvm,
	linux-openrisc, linux-parisc, linuxppc-dev, linux-riscv,
	linux-s390, linux-sh, sparclinux, Mark Rutland,
	Greg Kroah-Hartman, Christoph Hellwig, Christophe Leroy,
	David Hildenbrand, Conor Dooley, linux-um

This patch affords each architecture the ability to condition the
population of page middle directory entries on the virtual address being
allocated, matching existing PTE infrastructure, easing the necessity of
performing a reverse page table walk in cases where the population
context is not readily accessible, i.e. dynamic vmalloc calls on arm64.

To achieve this goal, it modifies every call and implementation of the
pmd_populate_kernel function across architectures, ensuring uniform
adoption across all kernel deployments.

Signed-off-by: Maxwell Bland <mbland@motorola.com>

---

Hi all,

Thank you for taking the time to review this change. This effects many
subarchitectures so the maintainers list is large. Apologies in advance
if there is a specific maintainer I should have spoken with directly for
deployment across subprojects.

The reason for such a sweeping change is from 
lore.kernel.org/all/cf5409c3-254a-459b-8969-429db2ec6439@redhat.com

It is my understanding as well that some subarchitectures may have
separate "next" or development branches ahead of the main upstream
linux. Please let me know if a cherry-pick to that branch is desired and
I will do my best to check out and deploy it as possible.

 arch/alpha/include/asm/pgalloc.h             |  5 +++--
 arch/arc/include/asm/pgalloc.h               |  3 ++-
 arch/arc/mm/highmem.c                        |  2 +-
 arch/arm/include/asm/kfence.h                |  2 +-
 arch/arm/include/asm/pgalloc.h               |  3 ++-
 arch/arm/mm/kasan_init.c                     |  2 +-
 arch/arm/mm/mmu.c                            |  2 +-
 arch/arm64/include/asm/pgalloc.h             |  3 ++-
 arch/arm64/mm/trans_pgd.c                    |  2 +-
 arch/csky/include/asm/pgalloc.h              |  2 +-
 arch/hexagon/include/asm/pgalloc.h           |  2 +-
 arch/loongarch/include/asm/pgalloc.h         |  3 ++-
 arch/loongarch/mm/init.c                     |  2 +-
 arch/loongarch/mm/kasan_init.c               |  2 +-
 arch/m68k/include/asm/mcf_pgalloc.h          |  2 +-
 arch/m68k/include/asm/motorola_pgalloc.h     |  3 ++-
 arch/m68k/include/asm/sun3_pgalloc.h         |  3 ++-
 arch/microblaze/include/asm/pgalloc.h        |  2 +-
 arch/mips/include/asm/pgalloc.h              |  2 +-
 arch/mips/kvm/mmu.c                          |  2 +-
 arch/nios2/include/asm/pgalloc.h             |  2 +-
 arch/openrisc/include/asm/pgalloc.h          |  2 +-
 arch/parisc/include/asm/pgalloc.h            |  5 +++--
 arch/parisc/mm/init.c                        |  6 +++---
 arch/powerpc/include/asm/book3s/32/pgalloc.h |  2 +-
 arch/powerpc/include/asm/book3s/64/pgalloc.h |  2 +-
 arch/powerpc/include/asm/nohash/32/pgalloc.h |  2 +-
 arch/powerpc/include/asm/nohash/64/pgalloc.h |  2 +-
 arch/powerpc/mm/book3s64/radix_pgtable.c     |  2 +-
 arch/powerpc/mm/kasan/init_32.c              |  4 ++--
 arch/powerpc/mm/kasan/init_book3e_64.c       |  9 ++++++---
 arch/powerpc/mm/kasan/init_book3s_64.c       |  7 +++++--
 arch/powerpc/mm/nohash/book3e_pgtable.c      |  2 +-
 arch/powerpc/mm/pgtable_32.c                 |  4 ++--
 arch/riscv/include/asm/pgalloc.h             |  2 +-
 arch/riscv/kernel/hibernate.c                |  2 +-
 arch/s390/include/asm/pgalloc.h              |  2 +-
 arch/sh/include/asm/pgalloc.h                |  2 +-
 arch/sh/mm/init.c                            |  2 +-
 arch/sparc/include/asm/pgalloc_32.h          |  3 ++-
 arch/sparc/include/asm/pgalloc_64.h          |  4 ++--
 arch/sparc/mm/init_64.c                      |  8 ++++----
 arch/um/include/asm/pgalloc.h                |  4 ++--
 arch/x86/include/asm/pgalloc.h               |  3 ++-
 arch/x86/mm/init_64.c                        | 14 +++++++++++---
 arch/x86/mm/ioremap.c                        |  2 +-
 arch/x86/mm/kasan_init_64.c                  |  2 +-
 arch/xtensa/include/asm/pgalloc.h            |  2 +-
 include/linux/mm.h                           |  4 ++--
 mm/hugetlb_vmemmap.c                         |  4 ++--
 mm/kasan/init.c                              | 14 +++++++++-----
 mm/memory.c                                  |  4 ++--
 mm/percpu.c                                  |  2 +-
 mm/pgalloc-track.h                           |  3 ++-
 mm/sparse-vmemmap.c                          |  2 +-
 55 files changed, 107 insertions(+), 78 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index 68be7adbfe58..1d3d86cad3cc 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -7,7 +7,7 @@
 
 #include <asm-generic/pgalloc.h>
 
-/*      
+/*
  * Allocate and free page tables. The xxx_kernel() versions are
  * used to allocate a kernel page table - this turns on ASN bits
  * if any.
@@ -20,7 +20,8 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte)
 }
 
 static inline void
-pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte,
+		    unsigned long vaddr)
 {
 	pmd_set(pmd, pte);
 }
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 096b8ef58edb..c0ebfa44b204 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -34,7 +34,8 @@
 #include <asm-generic/pgalloc.h>
 
 static inline void
-pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte,
+		    unsigned long vaddr)
 {
 	/*
 	 * The cast to long below is OK in 32-bit PAE40 regime with long long pte
diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c
index c79912a6b196..2d327cf35722 100644
--- a/arch/arc/mm/highmem.c
+++ b/arch/arc/mm/highmem.c
@@ -57,7 +57,7 @@ static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr)
 		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
 		      __func__, PAGE_SIZE, PAGE_SIZE);
 
-	pmd_populate_kernel(&init_mm, pmd_k, pte_k);
+	pmd_populate_kernel(&init_mm, pmd_k, pte_k, kvaddr);
 	return pte_k;
 }
 
diff --git a/arch/arm/include/asm/kfence.h b/arch/arm/include/asm/kfence.h
index 7980d0f2271f..dd4e4325d354 100644
--- a/arch/arm/include/asm/kfence.h
+++ b/arch/arm/include/asm/kfence.h
@@ -19,7 +19,7 @@ static inline int split_pmd_page(pmd_t *pmd, unsigned long addr)
 
 	for (i = 0; i < PTRS_PER_PTE; i++)
 		set_pte_ext(pte + i, pfn_pte(pfn + i, PAGE_KERNEL), 0);
-	pmd_populate_kernel(&init_mm, pmd, pte);
+	pmd_populate_kernel(&init_mm, pmd, pte, addr);
 
 	flush_tlb_kernel_range(addr, addr + PMD_SIZE);
 	return 0;
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index a17f01235c29..0a88346db17e 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -122,7 +122,8 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
  * Ensure that we always set both PMD entries.
  */
 static inline void
-pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep,
+		    unsigned long vaddr)
 {
 	/*
 	 * The pmd must be loaded with the physical address of the PTE table
diff --git a/arch/arm/mm/kasan_init.c b/arch/arm/mm/kasan_init.c
index 111d4f703136..9b3af2dce71e 100644
--- a/arch/arm/mm/kasan_init.c
+++ b/arch/arm/mm/kasan_init.c
@@ -111,7 +111,7 @@ static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
 				      __func__, addr);
 				return;
 			}
-			pmd_populate_kernel(&init_mm, pmdp, p);
+			pmd_populate_kernel(&init_mm, pmdp, p, addr);
 			flush_pmd_entry(pmdp);
 		}
 
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index c24e29c0b9a4..3cfed8dc4a19 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -384,7 +384,7 @@ void __init early_fixmap_init(void)
 		     != FIXADDR_TOP >> PMD_SHIFT);
 
 	pmd = fixmap_pmd(FIXADDR_TOP);
-	pmd_populate_kernel(&init_mm, pmd, bm_pte);
+	pmd_populate_kernel(&init_mm, pmd, bm_pte, __fix_to_virt(FIXADDR_TOP));
 
 	pte_offset_fixmap = pte_offset_early_fixmap;
 }
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 8ff5f2a2579e..5785272144e8 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -124,7 +124,8 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
  * of the mm address space.
  */
 static inline void
-pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep)
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep,
+		    unsigned long vaddr)
 {
 	VM_BUG_ON(mm && mm != &init_mm);
 	__pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN);
diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c
index 5139a28130c0..f84244d13099 100644
--- a/arch/arm64/mm/trans_pgd.c
+++ b/arch/arm64/mm/trans_pgd.c
@@ -69,7 +69,7 @@ static int copy_pte(struct trans_pgd_info *info, pmd_t *dst_pmdp,
 	dst_ptep = trans_alloc(info);
 	if (!dst_ptep)
 		return -ENOMEM;
-	pmd_populate_kernel(NULL, dst_pmdp, dst_ptep);
+	pmd_populate_kernel(NULL, dst_pmdp, dst_ptep, addr);
 	dst_ptep = pte_offset_kernel(dst_pmdp, start);
 
 	src_ptep = pte_offset_kernel(src_pmdp, start);
diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h
index 9c84c9012e53..f2c244c58acf 100644
--- a/arch/csky/include/asm/pgalloc.h
+++ b/arch/csky/include/asm/pgalloc.h
@@ -11,7 +11,7 @@
 #include <asm-generic/pgalloc.h>
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-					pte_t *pte)
+					pte_t *pte, unsigned long vaddr)
 {
 	set_pmd(pmd, __pmd(__pa(pte)));
 }
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index 55988625e6fb..2be773a5ffeb 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -62,7 +62,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
  * kernel map of the active thread who's calling pmd_populate_kernel...
  */
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-				       pte_t *pte)
+				       pte_t *pte, unsigned long vaddr)
 {
 	extern spinlock_t kmap_gen_lock;
 	pmd_t *ppmd;
diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
index 4e2d6b7ca2ee..6384391e69bd 100644
--- a/arch/loongarch/include/asm/pgalloc.h
+++ b/arch/loongarch/include/asm/pgalloc.h
@@ -13,7 +13,8 @@
 #include <asm-generic/pgalloc.h>
 
 static inline void pmd_populate_kernel(struct mm_struct *mm,
-				       pmd_t *pmd, pte_t *pte)
+				       pmd_t *pmd, pte_t *pte,
+				       unsigned long vaddr)
 {
 	set_pmd(pmd, __pmd((unsigned long)pte));
 }
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 4dd53427f657..b8952899b120 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -200,7 +200,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr)
 		pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 		if (!pte)
 			panic("%s: Failed to allocate memory\n", __func__);
-		pmd_populate_kernel(&init_mm, pmd, pte);
+		pmd_populate_kernel(&init_mm, pmd, pte, addr);
 	}
 
 	return pte_offset_kernel(pmd, addr);
diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c
index c608adc99845..51d40ff43aa9 100644
--- a/arch/loongarch/mm/kasan_init.c
+++ b/arch/loongarch/mm/kasan_init.c
@@ -110,7 +110,7 @@ static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node,
 				__pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node);
 		if (!early)
 			memcpy(__va(pte_phys), kasan_early_shadow_pte, sizeof(kasan_early_shadow_pte));
-		pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys));
+		pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys), addr);
 	}
 
 	return pte_offset_kernel(pmdp, addr);
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index 302c5bf67179..989a1aaa8aa1 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -30,7 +30,7 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 
 #define pmd_populate(mm, pmd, pte) (pmd_val(*pmd) = (unsigned long)(pte))
 
-#define pmd_populate_kernel pmd_populate
+#define pmd_populate_kernel(mm, pmd, pte, vaddr) pmd_populate(mm, pmd, pte)
 
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pgtable,
 				  unsigned long address)
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index 74a817d9387f..74aec6965981 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -79,7 +79,8 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 }
 
 
-static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+				       pte_t *pte, unsigned long vaddr)
 {
 	pmd_set(pmd, pte);
 }
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 4a137eecb6fe..550283e8bf4d 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -23,7 +23,8 @@ do {								\
 	tlb_remove_page_ptdesc((tlb), page_ptdesc(pte));	\
 } while (0)
 
-static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+				       pte_t *pte, unsigned long vaddr)
 {
 	pmd_val(*pmd) = __pa((unsigned long)pte);
 }
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 6c33b05f730f..b3cc2cd8fc50 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -35,7 +35,7 @@ extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 #define pmd_populate(mm, pmd, pte) \
 			(pmd_val(*(pmd)) = (unsigned long)page_address(pte))
 
-#define pmd_populate_kernel(mm, pmd, pte) \
+#define pmd_populate_kernel(mm, pmd, pte, vaddr) \
 		(pmd_val(*(pmd)) = (unsigned long) (pte))
 
 #endif /* _ASM_MICROBLAZE_PGALLOC_H */
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index f4440edcd8fe..fb71c8776a04 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -19,7 +19,7 @@
 #include <asm-generic/pgalloc.h>
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-	pte_t *pte)
+	pte_t *pte, unsigned long vaddr)
 {
 	set_pmd(pmd, __pmd((unsigned long)pte));
 }
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 467ee6b95ae1..47f48929a124 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -133,7 +133,7 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
 			return NULL;
 		new_pte = kvm_mmu_memory_cache_alloc(cache);
 		clear_page(new_pte);
-		pmd_populate_kernel(NULL, pmd, new_pte);
+		pmd_populate_kernel(NULL, pmd, new_pte, addr);
 	}
 	return pte_offset_kernel(pmd, addr);
 }
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index ce6bb8e74271..ea99d36a6fdd 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -15,7 +15,7 @@
 #include <asm-generic/pgalloc.h>
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-	pte_t *pte)
+	pte_t *pte, unsigned long vaddr)
 {
 	set_pmd(pmd, __pmd((unsigned long)pte));
 }
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index c6a73772a546..304cf8955bec 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -25,7 +25,7 @@
 
 extern int mem_init_done;
 
-#define pmd_populate_kernel(mm, pmd, pte) \
+#define pmd_populate_kernel(mm, pmd, pte, vaddr) \
 	set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte)))
 
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index e3e142b1c5c5..cba92c90a62a 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -61,13 +61,14 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 #endif
 
 static inline void
-pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
+pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte, unsigned long vaddr)
 {
 	set_pmd(pmd, __pmd((PxD_FLAG_PRESENT | PxD_FLAG_VALID)
 		+ (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)));
 }
 
 #define pmd_populate(mm, pmd, pte_page) \
-	pmd_populate_kernel(mm, pmd, page_address(pte_page))
+	pmd_populate_kernel(mm, pmd, page_address(pte_page), \
+			    (unsigned long)page_to_virt(pte_page))
 
 #endif
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index f876af56e13f..1cf3aae67023 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -390,7 +390,7 @@ static void __ref map_pages(unsigned long start_vaddr,
 				pg_table = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 				if (!pg_table)
 					panic("page table allocation failed\n");
-				pmd_populate_kernel(NULL, pmd, pg_table);
+				pmd_populate_kernel(NULL, pmd, pg_table, vaddr);
 			}
 
 			pg_table = pte_offset_kernel(pmd, vaddr);
@@ -481,7 +481,7 @@ void free_initmem(void)
 	/* finally dump all the instructions which were cached, since the
 	 * pages are no-longer executable */
 	flush_icache_range(init_begin, init_end);
-	
+
 	free_initmem_default(POISON_FREE_INITMEM);
 
 	/* set up a new led state on systems shipped LED State panel */
@@ -694,7 +694,7 @@ static void __init fixmap_init(void)
 		if (!pte)
 			panic("fixmap: pte allocation failed.\n");
 
-		pmd_populate_kernel(&init_mm, pmd, pte);
+		pmd_populate_kernel(&init_mm, pmd, pte, addr);
 
 		addr += PAGE_SIZE;
 	} while (addr < end);
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index dc5c039eb28e..b85105158686 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -26,7 +26,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 /* #define pgd_populate(mm, pmd, pte)      BUG() */
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
-				       pte_t *pte)
+				       pte_t *pte, unsigned long vaddr)
 {
 	*pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
 }
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index dd2cff53a111..061c4be60166 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -156,7 +156,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 }
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-				       pte_t *pte)
+				       pte_t *pte, unsigned long vaddr)
 {
 	*pmd = __pmd(__pgtable_ptr_val(pte) | PMD_VAL_BITS);
 }
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 11eac371e7e0..2788ce005b95 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -15,7 +15,7 @@
 /* #define pgd_populate(mm, pmd, pte)      BUG() */
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
-				       pte_t *pte)
+				       pte_t *pte, unsigned long vaddr)
 {
 	if (IS_ENABLED(CONFIG_BOOKE))
 		*pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index e50b211becb3..d069443b4014 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -37,7 +37,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 }
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-				       pte_t *pte)
+				       pte_t *pte, unsigned long vaddr)
 {
 	pmd_set(pmd, (unsigned long)pte);
 }
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 15e88f1439ec..a70063cd6f64 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -104,7 +104,7 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa,
 	if (!pmd_present(*pmdp)) {
 		ptep = early_alloc_pgtable(PAGE_SIZE, nid,
 						region_start, region_end);
-		pmd_populate_kernel(&init_mm, pmdp, ptep);
+		pmd_populate_kernel(&init_mm, pmdp, ptep, ea);
 	}
 	ptep = pte_offset_kernel(pmdp, ea);
 
diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c
index aa9aa11927b2..22df07fd1af5 100644
--- a/arch/powerpc/mm/kasan/init_32.c
+++ b/arch/powerpc/mm/kasan/init_32.c
@@ -47,7 +47,7 @@ int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_
 		if (!new)
 			return -ENOMEM;
 		kasan_populate_pte(new, PAGE_KERNEL);
-		pmd_populate_kernel(&init_mm, pmd, new);
+		pmd_populate_kernel(&init_mm, pmd, new, k_cur);
 	}
 	return 0;
 }
@@ -187,6 +187,6 @@ void __init kasan_early_init(void)
 
 	do {
 		next = pgd_addr_end(addr, end);
-		pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte);
+		pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte, addr);
 	} while (pmd++, addr = next, addr != end);
 }
diff --git a/arch/powerpc/mm/kasan/init_book3e_64.c b/arch/powerpc/mm/kasan/init_book3e_64.c
index 11519e88dc6b..05ccdb88ff51 100644
--- a/arch/powerpc/mm/kasan/init_book3e_64.c
+++ b/arch/powerpc/mm/kasan/init_book3e_64.c
@@ -54,7 +54,7 @@ static int __init kasan_map_kernel_page(unsigned long ea, unsigned long pa, pgpr
 	if (kasan_pte_table(*pmdp)) {
 		ptep = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
 		memcpy(ptep, kasan_early_shadow_pte, PTE_TABLE_SIZE);
-		pmd_populate_kernel(&init_mm, pmdp, ptep);
+		pmd_populate_kernel(&init_mm, pmdp, ptep, ea);
 	}
 	ptep = pte_offset_kernel(pmdp, ea);
 
@@ -93,9 +93,12 @@ void __init kasan_early_init(void)
 		__set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
 			     &kasan_early_shadow_pte[i], zero_pte, 0);
 
-	for (i = 0; i < PTRS_PER_PMD; i++)
+	addr = KASAN_SHADOW_START
+	for (i = 0; i < PTRS_PER_PMD; i++) {
 		pmd_populate_kernel(&init_mm, &kasan_early_shadow_pmd[i],
-				    kasan_early_shadow_pte);
+				    kasan_early_shadow_pte, addr);
+		addr += PMD_SIZE;
+	}
 
 	for (i = 0; i < PTRS_PER_PUD; i++)
 		pud_populate(&init_mm, &kasan_early_shadow_pud[i],
diff --git a/arch/powerpc/mm/kasan/init_book3s_64.c b/arch/powerpc/mm/kasan/init_book3s_64.c
index 9300d641cf9a..79569734dc29 100644
--- a/arch/powerpc/mm/kasan/init_book3s_64.c
+++ b/arch/powerpc/mm/kasan/init_book3s_64.c
@@ -55,6 +55,7 @@ void __init kasan_init(void)
 	phys_addr_t start, end;
 	u64 i;
 	pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL);
+	void *vaddr_start = __va(start);
 
 	if (!early_radix_enabled()) {
 		pr_warn("KASAN not enabled as it requires radix!");
@@ -68,9 +69,11 @@ void __init kasan_init(void)
 		__set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
 			     &kasan_early_shadow_pte[i], zero_pte, 0);
 
-	for (i = 0; i < PTRS_PER_PMD; i++)
+	for (i = 0; i < PTRS_PER_PMD; i++) {
 		pmd_populate_kernel(&init_mm, &kasan_early_shadow_pmd[i],
-				    kasan_early_shadow_pte);
+				    kasan_early_shadow_pte,
+				    vaddr_start + i * PMD_SIZE);
+	}
 
 	for (i = 0; i < PTRS_PER_PUD; i++)
 		pud_populate(&init_mm, &kasan_early_shadow_pud[i],
diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c
index 1c5e4ecbebeb..930bdd7a3774 100644
--- a/arch/powerpc/mm/nohash/book3e_pgtable.c
+++ b/arch/powerpc/mm/nohash/book3e_pgtable.c
@@ -107,7 +107,7 @@ int __ref map_kernel_page(unsigned long ea, phys_addr_t pa, pgprot_t prot)
 		pmdp = pmd_offset(pudp, ea);
 		if (!pmd_present(*pmdp)) {
 			ptep = early_alloc_pgtable(PTE_TABLE_SIZE);
-			pmd_populate_kernel(&init_mm, pmdp, ptep);
+			pmd_populate_kernel(&init_mm, pmdp, ptep, ea);
 		}
 		ptep = pte_offset_kernel(pmdp, ea);
 	}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index cfd622ebf774..e6fbaf3e9072 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -43,7 +43,7 @@ notrace void __init early_ioremap_init(void)
 
 	for (; (s32)(FIXADDR_TOP - addr) > 0;
 	     addr += PGDIR_SIZE, ptep += PTRS_PER_PTE, pmdp++)
-		pmd_populate_kernel(&init_mm, pmdp, ptep);
+		pmd_populate_kernel(&init_mm, pmdp, ptep, addr);
 
 	early_ioremap_setup();
 }
@@ -64,7 +64,7 @@ pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va)
 	if (pmd_none(*pmdp)) {
 		pte_t *ptep = early_alloc_pgtable(PTE_FRAG_SIZE);
 
-		pmd_populate_kernel(&init_mm, pmdp, ptep);
+		pmd_populate_kernel(&init_mm, pmdp, ptep, va);
 	}
 	return pte_offset_kernel(pmdp, va);
 }
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index deaf971253a2..d619daeded7f 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -16,7 +16,7 @@
 #include <asm-generic/pgalloc.h>
 
 static inline void pmd_populate_kernel(struct mm_struct *mm,
-	pmd_t *pmd, pte_t *pte)
+	pmd_t *pmd, pte_t *pte, unsigned long vaddr)
 {
 	unsigned long pfn = virt_to_pfn(pte);
 
diff --git a/arch/riscv/kernel/hibernate.c b/arch/riscv/kernel/hibernate.c
index 671b686c0158..085123ad4fa8 100644
--- a/arch/riscv/kernel/hibernate.c
+++ b/arch/riscv/kernel/hibernate.c
@@ -176,7 +176,7 @@ static int temp_pgtable_map_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long
 		if (!dst_ptep)
 			return -ENOMEM;
 
-		pmd_populate_kernel(NULL, dst_pmdp, dst_ptep);
+		pmd_populate_kernel(NULL, dst_pmdp, dst_ptep, 0);
 	}
 
 	dst_ptep = pte_offset_kernel(dst_pmdp, start);
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 7b84ef6dc4b6..4143b3f9d610 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -131,7 +131,7 @@ static inline void pmd_populate(struct mm_struct *mm,
 	set_pmd(pmd, __pmd(_SEGMENT_ENTRY | __pa(pte)));
 }
 
-#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)
+#define pmd_populate_kernel(mm, pmd, pte, vaddr) pmd_populate(mm, pmd, pte)
 
 /*
  * page table entry allocation/free routines.
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index 5d8577ab1591..04b29eb9712b 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -21,7 +21,7 @@ extern void pmd_free(struct mm_struct *mm, pmd_t *pmd);
 #endif
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
-				       pte_t *pte)
+				       pte_t *pte, unsigned long vaddr)
 {
 	set_pmd(pmd, __pmd((unsigned long)pte));
 }
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index bf1b54055316..c862572dbec8 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -157,7 +157,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
 		if (!pte)
 			panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
 			      __func__, PAGE_SIZE, PAGE_SIZE);
-		pmd_populate_kernel(&init_mm, pmd, pte);
+		pmd_populate_kernel(&init_mm, pmd, pte, 0);
 		BUG_ON(pte != pte_offset_kernel(pmd, 0));
 	}
 
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 4f73e87b22a3..558afcbd9016 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -53,7 +53,8 @@ static inline void free_pmd_fast(pmd_t * pmd)
 #define pmd_populate(mm, pmd, pte)	pmd_set(pmd, pte)
 
 void pmd_set(pmd_t *pmdp, pte_t *ptep);
-#define pmd_populate_kernel		pmd_populate
+#define pmd_populate_kernel(mm, pmd, pte, vaddr)	\
+	pmd_populate(mm, pmd, pte)
 
 pgtable_t pte_alloc_one(struct mm_struct *mm);
 
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index caa7632be4c2..185ad9637442 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -69,8 +69,8 @@ void pte_free(struct mm_struct *mm, pgtable_t ptepage);
 #define pte_free_defer pte_free_defer
 void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
 
-#define pmd_populate_kernel(MM, PMD, PTE)	pmd_set(MM, PMD, PTE)
-#define pmd_populate(MM, PMD, PTE)		pmd_set(MM, PMD, PTE)
+#define pmd_populate_kernel(MM, PMD, PTE, VADDR)	pmd_set(MM, PMD, PTE)
+#define pmd_populate(MM, PMD, PTE)			pmd_set(MM, PMD, PTE)
 
 void pgtable_free(void *table, bool is_page);
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 1ca9054d9b97..32b3c89f869d 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -5,7 +5,7 @@
  *  Copyright (C) 1996-1999 David S. Miller (davem@caip.rutgers.edu)
  *  Copyright (C) 1997-1999 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
- 
+
 #include <linux/extable.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
@@ -1843,7 +1843,7 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
 			if (!new)
 				goto err_alloc;
 			alloc_bytes += PAGE_SIZE;
-			pmd_populate_kernel(&init_mm, pmd, new);
+			pmd_populate_kernel(&init_mm, pmd, new, vstart);
 		}
 
 		pte = pte_offset_kernel(pmd, vstart);
@@ -2404,11 +2404,11 @@ void __init paging_init(void)
 	 * work.
 	 */
 	init_mm.pgd += ((shift) / (sizeof(pgd_t)));
-	
+
 	memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
 
 	inherit_prom_mappings();
-	
+
 	/* Ok, we can use our TLB miss and window trap handlers safely.  */
 	setup_tba();
 
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index de5e31c64793..300431ff61bb 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* 
+/*
  * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
  * Copyright 2003 PathScale, Inc.
  * Derived from include/asm-i386/pgalloc.h and include/asm-i386/pgtable.h
@@ -12,7 +12,7 @@
 
 #include <asm-generic/pgalloc.h>
 
-#define pmd_populate_kernel(mm, pmd, pte) \
+#define pmd_populate_kernel(mm, pmd, pte, vaddr) \
 	set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) __pa(pte)))
 
 #define pmd_populate(mm, pmd, pte) 				\
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index dcd836b59beb..3bc5e0cc7b38 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -62,7 +62,8 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
 }
 
 static inline void pmd_populate_kernel(struct mm_struct *mm,
-				       pmd_t *pmd, pte_t *pte)
+				       pmd_t *pmd, pte_t *pte,
+				       unsigned long vaddr)
 {
 	paravirt_alloc_pte(mm, __pa(pte) >> PAGE_SHIFT);
 	set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 7e177856ee4f..ee4a73842466 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -73,7 +73,15 @@ static inline void fname##_init(struct mm_struct *mm,		\
 DEFINE_POPULATE(p4d_populate, p4d, pud, init)
 DEFINE_POPULATE(pgd_populate, pgd, p4d, init)
 DEFINE_POPULATE(pud_populate, pud, pmd, init)
-DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init)
+
+static inline void pmd_populate_kernel_init(struct mm_struct *mm,
+		pmd_t *arg1, pte_t *arg2, unsigned long arg3, bool init)
+{
+	if (init)
+		pmd_populate_kernel_safe(mm, arg1, arg2);
+	else
+		pmd_populate_kernel(mm, arg1, arg2, arg3);
+}
 
 #define DEFINE_ENTRY(type1, type2, init)			\
 static inline void set_##type1##_init(type1##_t *arg1,		\
@@ -286,7 +294,7 @@ static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr)
 {
 	if (pmd_none(*pmd)) {
 		pte_t *pte = (pte_t *) spp_getpage();
-		pmd_populate_kernel(&init_mm, pmd, pte);
+		pmd_populate_kernel(&init_mm, pmd, pte, vaddr);
 		if (pte != pte_offset_kernel(pmd, 0))
 			printk(KERN_ERR "PAGETABLE BUG #03!\n");
 	}
@@ -575,7 +583,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
 		paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init);
 
 		spin_lock(&init_mm.page_table_lock);
-		pmd_populate_kernel_init(&init_mm, pmd, pte, init);
+		pmd_populate_kernel_init(&init_mm, pmd, pte, init, __va(paddr));
 		spin_unlock(&init_mm.page_table_lock);
 	}
 	update_page_count(PG_LEVEL_2M, pages);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index aa7d279321ea..8844047fdaad 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -888,7 +888,7 @@ void __init early_ioremap_init(void)
 
 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
 	memset(bm_pte, 0, sizeof(bm_pte));
-	pmd_populate_kernel(&init_mm, pmd, bm_pte);
+	pmd_populate_kernel(&init_mm, pmd, bm_pte, fix_to_virt(FIX_BTMAP_BEGIN));
 
 	/*
 	 * The boot-ioremap range spans multiple pmds, for which
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 9dddf19a5571..95ae9e12fe41 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -53,7 +53,7 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
 		}
 
 		p = early_alloc(PAGE_SIZE, nid, true);
-		pmd_populate_kernel(&init_mm, pmd, p);
+		pmd_populate_kernel(&init_mm, pmd, p, addr);
 	}
 
 	pte = pte_offset_kernel(pmd, addr);
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index 7fc0f9126dd3..5359e4091b9a 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -21,7 +21,7 @@
  * inside the pgd, so has no extra memory associated with it.
  */
 
-#define pmd_populate_kernel(mm, pmdp, ptep)				     \
+#define pmd_populate_kernel(mm, pmdp, ptep, vaddr)			     \
 	(pmd_val(*(pmdp)) = ((unsigned long)ptep))
 #define pmd_populate(mm, pmdp, page)					     \
 	(pmd_val(*(pmdp)) = ((unsigned long)page_to_virt(page)))
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7b0ee64225de..7162667c0e37 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2802,7 +2802,7 @@ static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
 #endif
 
 int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
-int __pte_alloc_kernel(pmd_t *pmd);
+int __pte_alloc_kernel(pmd_t *pmd, unsigned long vaddr);
 
 #if defined(CONFIG_MMU)
 
@@ -2997,7 +2997,7 @@ pte_t *pte_offset_map_nolock(struct mm_struct *mm, pmd_t *pmd,
 		 NULL : pte_offset_map_lock(mm, pmd, address, ptlp))
 
 #define pte_alloc_kernel(pmd, address)			\
-	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
+	((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
 		NULL: pte_offset_kernel(pmd, address))
 
 #if USE_SPLIT_PMD_PTLOCKS
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index da177e49d956..cfbe3695fffb 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -58,7 +58,7 @@ static int vmemmap_split_pmd(pmd_t *pmd, struct page *head, unsigned long start,
 	if (!pgtable)
 		return -ENOMEM;
 
-	pmd_populate_kernel(&init_mm, &__pmd, pgtable);
+	pmd_populate_kernel(&init_mm, &__pmd, pgtable, start);
 
 	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
 		pte_t entry, *pte;
@@ -81,7 +81,7 @@ static int vmemmap_split_pmd(pmd_t *pmd, struct page *head, unsigned long start,
 
 		/* Make pte visible before pmd. See comment in pmd_install(). */
 		smp_wmb();
-		pmd_populate_kernel(&init_mm, pmd, pgtable);
+		pmd_populate_kernel(&init_mm, pmd, pgtable, start);
 		if (!(walk->flags & VMEMMAP_SPLIT_NO_TLB_FLUSH))
 			flush_tlb_kernel_range(start, start + PMD_SIZE);
 	} else {
diff --git a/mm/kasan/init.c b/mm/kasan/init.c
index 89895f38f722..813f8e8a801c 100644
--- a/mm/kasan/init.c
+++ b/mm/kasan/init.c
@@ -117,7 +117,8 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
 
 		if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) {
 			pmd_populate_kernel(&init_mm, pmd,
-					lm_alias(kasan_early_shadow_pte));
+					lm_alias(kasan_early_shadow_pte),
+					addr);
 			continue;
 		}
 
@@ -131,7 +132,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
 			if (!p)
 				return -ENOMEM;
 
-			pmd_populate_kernel(&init_mm, pmd, p);
+			pmd_populate_kernel(&init_mm, pmd, p, addr);
 		}
 		zero_pte_populate(pmd, addr, next);
 	} while (pmd++, addr = next, addr != end);
@@ -158,7 +159,8 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
 					lm_alias(kasan_early_shadow_pmd));
 			pmd = pmd_offset(pud, addr);
 			pmd_populate_kernel(&init_mm, pmd,
-					lm_alias(kasan_early_shadow_pte));
+					lm_alias(kasan_early_shadow_pte),
+					addr);
 			continue;
 		}
 
@@ -204,7 +206,8 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
 					lm_alias(kasan_early_shadow_pmd));
 			pmd = pmd_offset(pud, addr);
 			pmd_populate_kernel(&init_mm, pmd,
-					lm_alias(kasan_early_shadow_pte));
+					lm_alias(kasan_early_shadow_pte),
+					addr);
 			continue;
 		}
 
@@ -267,7 +270,8 @@ int __ref kasan_populate_early_shadow(const void *shadow_start,
 					lm_alias(kasan_early_shadow_pmd));
 			pmd = pmd_offset(pud, addr);
 			pmd_populate_kernel(&init_mm, pmd,
-					lm_alias(kasan_early_shadow_pte));
+					lm_alias(kasan_early_shadow_pte),
+					addr);
 			continue;
 		}
 
diff --git a/mm/memory.c b/mm/memory.c
index d2155ced45f8..67807ade9a0e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -447,7 +447,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd)
 	return 0;
 }
 
-int __pte_alloc_kernel(pmd_t *pmd)
+int __pte_alloc_kernel(pmd_t *pmd, unsigned long vaddr)
 {
 	pte_t *new = pte_alloc_one_kernel(&init_mm);
 	if (!new)
@@ -456,7 +456,7 @@ int __pte_alloc_kernel(pmd_t *pmd)
 	spin_lock(&init_mm.page_table_lock);
 	if (likely(pmd_none(*pmd))) {	/* Has another populated it ? */
 		smp_wmb(); /* See comment in pmd_install() */
-		pmd_populate_kernel(&init_mm, pmd, new);
+		pmd_populate_kernel(&init_mm, pmd, new, vaddr);
 		new = NULL;
 	}
 	spin_unlock(&init_mm.page_table_lock);
diff --git a/mm/percpu.c b/mm/percpu.c
index 4e11fc1e6def..fc83cf64baf6 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -3238,7 +3238,7 @@ void __init __weak pcpu_populate_pte(unsigned long addr)
 		new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
 		if (!new)
 			goto err_alloc;
-		pmd_populate_kernel(&init_mm, pmd, new);
+		pmd_populate_kernel(&init_mm, pmd, new, addr);
 	}
 
 	return;
diff --git a/mm/pgalloc-track.h b/mm/pgalloc-track.h
index e9e879de8649..ac983705a054 100644
--- a/mm/pgalloc-track.h
+++ b/mm/pgalloc-track.h
@@ -45,7 +45,8 @@ static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
 
 #define pte_alloc_kernel_track(pmd, address, mask)			\
 	((unlikely(pmd_none(*(pmd))) &&					\
-	  (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
+	  (__pte_alloc_kernel(pmd, address) ||				\
+	   ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
 		NULL: pte_offset_kernel(pmd, address))
 
 #endif /* _LINUX_PGALLOC_TRACK_H */
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index a2cbe44c48e1..6085c8339b65 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -191,7 +191,7 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
 		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
 		if (!p)
 			return NULL;
-		pmd_populate_kernel(&init_mm, pmd, p);
+		pmd_populate_kernel(&init_mm, pmd, p, addr);
 	}
 	return pmd;
 }
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/5 RESEND] arm64: dynamic enforcement of PXNTable
  2024-04-15 20:16 [PATCH 0/5] mm: code and data partitioning improvements Maxwell Bland
                   ` (4 preceding siblings ...)
  2024-04-16 19:18 ` [PATCH 3/5 RESEND] mm: add vaddr param to pmd_populate_kernel Maxwell Bland
@ 2024-04-16 19:18 ` Maxwell Bland
  2024-04-12 15:00   ` [PATCH 4/5] " Maxwell Bland
  2024-04-17  6:37   ` [PATCH 4/5 RESEND] " kernel test robot
  5 siblings, 2 replies; 18+ messages in thread
From: Maxwell Bland @ 2024-04-16 19:18 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: Maxwell Bland, Catalin Marinas, Will Deacon, Ard Biesheuvel,
	Maxwell Bland, Mark Rutland, Greg Kroah-Hartman,
	Christoph Hellwig, Christophe Leroy, David Hildenbrand,
	Conor Dooley, linux-kernel

PXNTable is enforced during the init process to ensure that regions of
user memory and kernel data cannot be executed from, preventing attacks
which write to writable kernel pages and then modify the kernel's page
tables to make this code executable. This patch ensures this protection
is also preserved for dynamically allocated pages/pagetables, making it
so that all PMDs populated outside of the module code region are
PXNTable by default.

Signed-off-by: Maxwell Bland <mbland@motorola.com>
---
 arch/arm64/include/asm/pgalloc.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 5785272144e8..2376b4e7915c 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -12,6 +12,7 @@
 #include <asm/processor.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
+#include <asm/module.h>
 
 #define __HAVE_ARCH_PGD_FREE
 #define __HAVE_ARCH_PUD_FREE
@@ -119,6 +120,12 @@ static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
 	set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot));
 }
 
+static inline bool vaddr_is_data(unsigned long vaddr)
+{
+	return ((vaddr + PMD_SIZE < MODULES_ASLR_START || vaddr >= MODULES_ASLR_END) &&
+		(vaddr + PMD_SIZE < (unsigned long) _text || vaddr >= (unsigned long) _etext));
+}
+
 /*
  * Populate the pmdp entry with a pointer to the pte.  This pmd is part
  * of the mm address space.
@@ -127,8 +134,11 @@ static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *ptep,
 		    unsigned long vaddr)
 {
+	pmdval_t pmd = PMD_TYPE_TABLE | PMD_TABLE_UXN;
 	VM_BUG_ON(mm && mm != &init_mm);
-	__pmd_populate(pmdp, __pa(ptep), PMD_TYPE_TABLE | PMD_TABLE_UXN);
+	if (vaddr_is_data(vaddr))
+		pmd |= PMD_TABLE_PXN;
+	__pmd_populate(pmdp, __pa(ptep), pmd);
 }
 
 static inline void
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 5/5 RESEND] ptdump: add state parameter for non-leaf callback
  2024-04-15 19:51 ` [PATCH 5/5] ptdump: add state parameter for non-leaf callback Maxwell Bland
@ 2024-04-16 19:18   ` Maxwell Bland
  2024-04-16 20:11   ` [PATCH 5/5] " Andrew Morton
  1 sibling, 0 replies; 18+ messages in thread
From: Maxwell Bland @ 2024-04-16 19:18 UTC (permalink / raw)
  To: linux-mm
  Cc: Maxwell Bland, Catalin Marinas, Will Deacon, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy, Aneesh Kumar K.V,
	Naveen N. Rao, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	Alexander Gordeev, Gerald Schaefer, Heiko Carstens,
	Vasily Gorbik, Christian Borntraeger, Sven Schnelle, Dave Hansen,
	Andy Lutomirski, Peter Zijlstra, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, x86, H. Peter Anvin, Andrew Morton,
	Ard Biesheuvel, Mark Rutland, Maxwell Bland, Alexandre Ghiti,
	Yu Chien Peter Lin, Song Shuai, linux-arm-kernel, linux-kernel,
	linuxppc-dev, linux-riscv, Mark Rutland, Greg Kroah-Hartman,
	Christoph Hellwig, Christophe Leroy, David Hildenbrand,
	Conor Dooley, linux-s390

ptdump can now note non-leaf descriptor entries, a useful addition for
debugging table descriptor permissions when working on related code

Signed-off-by: Maxwell Bland <mbland@motorola.com>
---
 arch/arm64/mm/ptdump.c          |  6 ++++--
 arch/powerpc/mm/ptdump/ptdump.c |  2 ++
 arch/riscv/mm/ptdump.c          |  6 ++++--
 arch/s390/mm/dump_pagetables.c  |  6 ++++--
 arch/x86/mm/dump_pagetables.c   |  3 ++-
 include/linux/ptdump.h          |  1 +
 mm/ptdump.c                     | 13 +++++++++++++
 7 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index 796231a4fd63..1a6f4a3513e5 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -299,7 +299,8 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
 			.range = (struct ptdump_range[]){
 				{info->base_addr, end},
 				{0, 0}
-			}
+			},
+			.note_non_leaf = false
 		}
 	};
 
@@ -335,7 +336,8 @@ bool ptdump_check_wx(void)
 			.range = (struct ptdump_range[]) {
 				{_PAGE_OFFSET(vabits_actual), ~0UL},
 				{0, 0}
-			}
+			},
+			.note_non_leaf = false
 		}
 	};
 
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 9dc239967b77..89e673f5fd3d 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -307,6 +307,7 @@ static int ptdump_show(struct seq_file *m, void *v)
 		.ptdump = {
 			.note_page = note_page,
 			.range = ptdump_range,
+			.note_non_leaf = false
 		}
 	};
 
@@ -340,6 +341,7 @@ bool ptdump_check_wx(void)
 		.ptdump = {
 			.note_page = note_page,
 			.range = ptdump_range,
+			.note_non_leaf = false
 		}
 	};
 
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
index 1289cc6d3700..b355633afcaf 100644
--- a/arch/riscv/mm/ptdump.c
+++ b/arch/riscv/mm/ptdump.c
@@ -328,7 +328,8 @@ static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo)
 			.range = (struct ptdump_range[]) {
 				{pinfo->base_addr, pinfo->end},
 				{0, 0}
-			}
+			},
+			.note_non_leaf = false
 		}
 	};
 
@@ -350,7 +351,8 @@ bool ptdump_check_wx(void)
 			.range = (struct ptdump_range[]) {
 				{KERN_VIRT_START, ULONG_MAX},
 				{0, 0}
-			}
+			},
+			.note_non_leaf = false
 		}
 	};
 
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index ffd07ed7b4af..6468cfd53e2a 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -200,7 +200,8 @@ bool ptdump_check_wx(void)
 			.range = (struct ptdump_range[]) {
 				{.start = 0, .end = max_addr},
 				{.start = 0, .end = 0},
-			}
+			},
+			.note_non_leaf = false
 		},
 		.seq = NULL,
 		.level = -1,
@@ -239,7 +240,8 @@ static int ptdump_show(struct seq_file *m, void *v)
 			.range = (struct ptdump_range[]) {
 				{.start = 0, .end = max_addr},
 				{.start = 0, .end = 0},
-			}
+			},
+			.note_non_leaf = false
 		},
 		.seq = m,
 		.level = -1,
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 89079ea73e65..43f00dfb955f 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -380,7 +380,8 @@ bool ptdump_walk_pgd_level_core(struct seq_file *m,
 		.ptdump = {
 			.note_page	= note_page,
 			.effective_prot = effective_prot,
-			.range		= ptdump_ranges
+			.range		= ptdump_ranges,
+			.note_non_leaf  = false
 		},
 		.level = -1,
 		.to_dmesg	= dmesg,
diff --git a/include/linux/ptdump.h b/include/linux/ptdump.h
index 8dbd51ea8626..b3e793a5c77f 100644
--- a/include/linux/ptdump.h
+++ b/include/linux/ptdump.h
@@ -16,6 +16,7 @@ struct ptdump_state {
 			  int level, u64 val);
 	void (*effective_prot)(struct ptdump_state *st, int level, u64 val);
 	const struct ptdump_range *range;
+	bool note_non_leaf;
 };
 
 bool ptdump_walk_pgd_level_core(struct seq_file *m,
diff --git a/mm/ptdump.c b/mm/ptdump.c
index 106e1d66e9f9..97da7a765b22 100644
--- a/mm/ptdump.c
+++ b/mm/ptdump.c
@@ -41,6 +41,9 @@ static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
 	if (st->effective_prot)
 		st->effective_prot(st, 0, pgd_val(val));
 
+	if (st->note_non_leaf && !pgd_leaf(val))
+		st->note_page(st, addr, 0, pgd_val(val));
+
 	if (pgd_leaf(val)) {
 		st->note_page(st, addr, 0, pgd_val(val));
 		walk->action = ACTION_CONTINUE;
@@ -64,6 +67,9 @@ static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
 	if (st->effective_prot)
 		st->effective_prot(st, 1, p4d_val(val));
 
+	if (st->note_non_leaf && !p4d_leaf(val))
+		st->note_page(st, addr, 1, p4d_val(val));
+
 	if (p4d_leaf(val)) {
 		st->note_page(st, addr, 1, p4d_val(val));
 		walk->action = ACTION_CONTINUE;
@@ -87,6 +93,9 @@ static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
 	if (st->effective_prot)
 		st->effective_prot(st, 2, pud_val(val));
 
+	if (st->note_non_leaf && !pud_leaf(val))
+		st->note_page(st, addr, 2, pud_val(val));
+
 	if (pud_leaf(val)) {
 		st->note_page(st, addr, 2, pud_val(val));
 		walk->action = ACTION_CONTINUE;
@@ -108,6 +117,10 @@ static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
 
 	if (st->effective_prot)
 		st->effective_prot(st, 3, pmd_val(val));
+
+	if (st->note_non_leaf && !pmd_leaf(val))
+		st->note_page(st, addr, 3, pmd_val(val));
+
 	if (pmd_leaf(val)) {
 		st->note_page(st, addr, 3, pmd_val(val));
 		walk->action = ACTION_CONTINUE;
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] ptdump: add state parameter for non-leaf callback
  2024-04-15 19:51 ` [PATCH 5/5] ptdump: add state parameter for non-leaf callback Maxwell Bland
  2024-04-16 19:18   ` [PATCH 5/5 RESEND] " Maxwell Bland
@ 2024-04-16 20:11   ` Andrew Morton
  2024-04-16 21:01     ` Maxwell Bland
  1 sibling, 1 reply; 18+ messages in thread
From: Andrew Morton @ 2024-04-16 20:11 UTC (permalink / raw)
  To: Maxwell Bland
  Cc: linux-mm, Catalin Marinas, Will Deacon, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy, Aneesh Kumar K.V,
	Naveen N. Rao, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	Alexander Gordeev, Gerald Schaefer, Heiko Carstens,
	Vasily Gorbik, Christian Borntraeger, Sven Schnelle, Dave Hansen,
	Andy Lutomirski, Peter Zijlstra, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, x86, H. Peter Anvin, Ard Biesheuvel,
	Mark Rutland, Alexandre Ghiti, Yu Chien Peter Lin, Song Shuai,
	linux-arm-kernel, linux-kernel, linuxppc-dev, linux-riscv,
	linux-s390

On Mon, 15 Apr 2024 14:51:32 -0500 Maxwell Bland <mbland@motorola.com> wrote:

> ptdump can now note non-leaf descriptor entries, a useful addition for
> debugging table descriptor permissions when working on related code
> 
> Signed-off-by: Maxwell Bland <mbland@motorola.com>
> ---
>  arch/arm64/mm/ptdump.c          |  6 ++++--
>  arch/powerpc/mm/ptdump/ptdump.c |  2 ++
>  arch/riscv/mm/ptdump.c          |  6 ++++--
>  arch/s390/mm/dump_pagetables.c  |  6 ++++--
>  arch/x86/mm/dump_pagetables.c   |  3 ++-
>  include/linux/ptdump.h          |  1 +
>  mm/ptdump.c                     | 13 +++++++++++++
>  7 files changed, 30 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
> index 796231a4fd63..1a6f4a3513e5 100644
> --- a/arch/arm64/mm/ptdump.c
> +++ b/arch/arm64/mm/ptdump.c
> @@ -299,7 +299,8 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
>  			.range = (struct ptdump_range[]){
>  				{info->base_addr, end},
>  				{0, 0}
> -			}
> +			},
> +			.note_non_leaf = false
>  		}

It would be acceptable to omit all of these and rely upon the runtime
zeroing which the compiler will emit.

Documentation/arch/arm64/ptdump.rst might need updating.

Please include sample output in the changelog so we can better
understand the user's view of this change.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] ptdump: add state parameter for non-leaf callback
  2024-04-16 20:11   ` [PATCH 5/5] " Andrew Morton
@ 2024-04-16 21:01     ` Maxwell Bland
  0 siblings, 0 replies; 18+ messages in thread
From: Maxwell Bland @ 2024-04-16 21:01 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-mm, Catalin Marinas, Will Deacon, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy, Aneesh Kumar K.V,
	Naveen N. Rao, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	Alexander Gordeev, Gerald Schaefer, Heiko Carstens,
	Vasily Gorbik, Christian Borntraeger, Sven Schnelle, Dave Hansen,
	Andy Lutomirski, Peter Zijlstra, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, x86, H. Peter Anvin, Ard Biesheuvel,
	Mark Rutland, Alexandre Ghiti, Yu Chien Peter Lin, Song Shuai,
	linux-arm-kernel, linux-kernel, linuxppc-dev, linux-riscv,
	linux-s390

> On Tuesday, April 16, 2024 3:11 PM, Andrew Morton wrote:
> On Mon, 15 Apr 2024 14:51:32 -0500 Maxwell Bland <mbland@motorola.com>
> wrote:
>
> >  arch/arm64/mm/ptdump.c          |  6 ++++--
> >  arch/powerpc/mm/ptdump/ptdump.c |  2 ++
> >  arch/riscv/mm/ptdump.c          |  6 ++++--
> >  arch/s390/mm/dump_pagetables.c  |  6 ++++--
> >  arch/x86/mm/dump_pagetables.c   |  3 ++-
> >  include/linux/ptdump.h          |  1 +
> >  mm/ptdump.c                     | 13 +++++++++++++
> >  7 files changed, 30 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
> > index 796231a4fd63..1a6f4a3513e5 100644
> > --- a/arch/arm64/mm/ptdump.c
> > +++ b/arch/arm64/mm/ptdump.c
> > @@ -299,7 +299,8 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info
> *info)
> >                     .range = (struct ptdump_range[]){
> >                             {info->base_addr, end},
> >                             {0, 0}
> > -                   }
> > +                   },
> > +                   .note_non_leaf = false
> >             }
>
> It would be acceptable to omit all of these and rely upon the runtime
> zeroing which the compiler will emit.

Ah, thank you for the pointer to C99 6.7.8.21. I had always figured since
structs are stack allocated they are potentially non-initialized!

> Documentation/arch/arm64/ptdump.rst might need updating.
>
> Please include sample output in the changelog so we can better
> understand the user's view of this change.

Thanks, I will do both in the next few days everything permitting! Right now
this patch results in no change until note_non_leaf = true is adopted for each
arch.

My plan: I will polish then include output of my personal fixes for arm64.
Specifically, printing expanded PMD flags and tab indenting the layout
according to each level.

Hopefully just adding arm64 support for now is OK, unless maybe we want to
default this to true on all arches? IMO default true would be sweet, but I
wasn't sure everyone would agree.

BRs,
Maxwell Bland

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/5] arm64: mm: code and data partitioning for aslr
  2024-04-03 21:08 ` [PATCH 2/5] arm64: mm: code and data partitioning for aslr Maxwell Bland
  2024-04-16 19:18   ` [PATCH 2/5 RESEND] " Maxwell Bland
@ 2024-04-17  5:14   ` kernel test robot
  1 sibling, 0 replies; 18+ messages in thread
From: kernel test robot @ 2024-04-17  5:14 UTC (permalink / raw)
  To: Maxwell Bland, linux-arm-kernel
  Cc: oe-kbuild-all, Maxwell Bland, linux-kernel, Catalin Marinas,
	Will Deacon, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, Jiri Olsa, Zi Shen Lim, Mark Rutland, Ard Biesheuvel,
	Kees Cook, Sami Tolvanen, Baoquan He, Jonathan Cameron,
	Greg Kroah-Hartman, Ryo Takakura, James Morse, Christophe Leroy,
	bpf

Hi Maxwell,

kernel test robot noticed the following build errors:

[auto build test ERROR on 0bbac3facb5d6cc0171c45c9873a2dc96bea9680]

url:    https://github.com/intel-lab-lkp/linux/commits/Maxwell-Bland/mm-allow-arch-refinement-skip-for-vmap-alloc/20240417-032149
base:   0bbac3facb5d6cc0171c45c9873a2dc96bea9680
patch link:    https://lore.kernel.org/r/20240416122254.868007168-3-mbland%40motorola.com
patch subject: [PATCH 2/5] arm64: mm: code and data partitioning for aslr
config: arm64-allnoconfig (https://download.01.org/0day-ci/archive/20240417/202404171355.jlsKaUGf-lkp@intel.com/config)
compiler: aarch64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240417/202404171355.jlsKaUGf-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202404171355.jlsKaUGf-lkp@intel.com/

All errors (new ones prefixed by >>):

   aarch64-linux-ld: Unexpected GOT/PLT entries detected!
   aarch64-linux-ld: Unexpected run-time procedure linkages detected!
   aarch64-linux-ld: arch/arm64/kernel/setup.o: in function `setup_arch':
>> setup.c:(.init.text+0x694): undefined reference to `module_init_limits'

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/5 RESEND] arm64: dynamic enforcement of PXNTable
  2024-04-16 19:18 ` [PATCH 4/5 RESEND] arm64: dynamic enforcement of PXNTable Maxwell Bland
  2024-04-12 15:00   ` [PATCH 4/5] " Maxwell Bland
@ 2024-04-17  6:37   ` kernel test robot
  1 sibling, 0 replies; 18+ messages in thread
From: kernel test robot @ 2024-04-17  6:37 UTC (permalink / raw)
  To: Maxwell Bland, linux-arm-kernel
  Cc: oe-kbuild-all, Maxwell Bland, Catalin Marinas, Will Deacon,
	Ard Biesheuvel, Mark Rutland, Greg Kroah-Hartman,
	Christoph Hellwig, Christophe Leroy, David Hildenbrand,
	Conor Dooley, linux-kernel

Hi Maxwell,

kernel test robot noticed the following build errors:

[auto build test ERROR on 0bbac3facb5d6cc0171c45c9873a2dc96bea9680]

url:    https://github.com/intel-lab-lkp/linux/commits/Maxwell-Bland/mm-allow-arch-refinement-skip-for-vmap-alloc/20240417-032149
base:   0bbac3facb5d6cc0171c45c9873a2dc96bea9680
patch link:    https://lore.kernel.org/r/20240416122254.868007168-5-mbland%40motorola.com
patch subject: [PATCH 4/5 RESEND] arm64: dynamic enforcement of PXNTable
config: arm64-allnoconfig (https://download.01.org/0day-ci/archive/20240417/202404171444.fqXW3YmG-lkp@intel.com/config)
compiler: aarch64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240417/202404171444.fqXW3YmG-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202404171444.fqXW3YmG-lkp@intel.com/

All errors (new ones prefixed by >>):

   aarch64-linux-ld: Unexpected GOT/PLT entries detected!
   aarch64-linux-ld: Unexpected run-time procedure linkages detected!
   aarch64-linux-ld: arch/arm64/kernel/setup.o: in function `setup_arch':
   setup.c:(.init.text+0x694): undefined reference to `module_init_limits'
   aarch64-linux-ld: mm/memory.o: in function `__pte_alloc_kernel':
>> memory.c:(.text+0x2b64): undefined reference to `module_plt_base'
   aarch64-linux-ld: mm/memory.o: relocation R_AARCH64_ADR_PREL_PG_HI21 against symbol `module_plt_base' which may bind externally can not be used when making a shared object; recompile with -fPIC
   memory.c:(.text+0x2b64): dangerous relocation: unsupported relocation
>> aarch64-linux-ld: memory.c:(.text+0x2b6c): undefined reference to `module_plt_base'
>> aarch64-linux-ld: memory.c:(.text+0x2b74): undefined reference to `module_direct_base'
   aarch64-linux-ld: mm/memory.o: relocation R_AARCH64_ADR_PREL_PG_HI21 against symbol `module_direct_base' which may bind externally can not be used when making a shared object; recompile with -fPIC
   memory.c:(.text+0x2b74): dangerous relocation: unsupported relocation
   aarch64-linux-ld: memory.c:(.text+0x2b78): undefined reference to `module_direct_base'
   aarch64-linux-ld: mm/sparse-vmemmap.o: in function `vmemmap_pmd_populate':
>> sparse-vmemmap.c:(.meminit.text+0x450): undefined reference to `module_plt_base'
   aarch64-linux-ld: mm/sparse-vmemmap.o: relocation R_AARCH64_ADR_PREL_PG_HI21 against symbol `module_plt_base' which may bind externally can not be used when making a shared object; recompile with -fPIC
   sparse-vmemmap.c:(.meminit.text+0x450): dangerous relocation: unsupported relocation
>> aarch64-linux-ld: sparse-vmemmap.c:(.meminit.text+0x458): undefined reference to `module_plt_base'
>> aarch64-linux-ld: sparse-vmemmap.c:(.meminit.text+0x460): undefined reference to `module_direct_base'
   aarch64-linux-ld: mm/sparse-vmemmap.o: relocation R_AARCH64_ADR_PREL_PG_HI21 against symbol `module_direct_base' which may bind externally can not be used when making a shared object; recompile with -fPIC
   sparse-vmemmap.c:(.meminit.text+0x460): dangerous relocation: unsupported relocation
   aarch64-linux-ld: sparse-vmemmap.c:(.meminit.text+0x464): undefined reference to `module_direct_base'

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/5] mm: allow arch refinement/skip for vmap alloc
  2024-04-02 20:15 ` [PATCH 1/5] mm: allow arch refinement/skip for vmap alloc Maxwell Bland
  2024-04-16 19:18   ` [PATCH 1/5 RESEND] " Maxwell Bland
@ 2024-04-18  8:55   ` Uladzislau Rezki
  2024-04-18 15:52     ` Maxwell Bland
  1 sibling, 1 reply; 18+ messages in thread
From: Uladzislau Rezki @ 2024-04-18  8:55 UTC (permalink / raw)
  To: Maxwell Bland
  Cc: linux-mm, linux-kernel, Andrew Morton, Uladzislau Rezki,
	Christoph Hellwig, Lorenzo Stoakes

On Tue, Apr 02, 2024 at 03:15:01PM -0500, Maxwell Bland wrote:
> Makes red black tree allocation more flexible on a per-architecture
> basis by introducing an optional hooks to refine the red-black tree
> structuring and exposing vmalloc functions for clipping vmap areas,
> finding vmap areas, and inserting vmap areas.
> 
> With this patch, the red-black vmap tree can be refined to account for
> architecture-specific memory management operations, most notably address
> space layout randomization, as these features conflict with generic
> management of a single vmalloc_start to vmalloc_end range as given by
> mm/vmalloc.c.
> 
> For example, x86 is forced to restrict aslr to 1024 possible locations,
> which is a very, very small number, and arm64 breaks standard code/data
> partitioning altogether, which prevents the enforcement of performant
> immmutability on kernel page tables.
> 
> Signed-off-by: Maxwell Bland <mbland@motorola.com>
> ---
>  include/linux/vmalloc.h | 24 ++++++++++++++++++++++++
>  mm/vmalloc.c            | 16 ++++++++++------
>  2 files changed, 34 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
> index 98ea90e90439..3c5ce7ee0bea 100644
> --- a/include/linux/vmalloc.h
> +++ b/include/linux/vmalloc.h
> @@ -12,6 +12,7 @@
>  
>  #include <asm/vmalloc.h>
>  
> +struct kmem_cache;
>  struct vm_area_struct;		/* vma defining user mapping in mm_types.h */
>  struct notifier_block;		/* in notifier.h */
>  struct iov_iter;		/* in uio.h */
> @@ -125,6 +126,21 @@ static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
>  }
>  #endif
>  
> +#ifndef arch_skip_va
> +static inline bool arch_skip_va(struct vmap_area *va, unsigned long vstart)
> +{
> +	return false;
> +}
> +#endif
> +
> +#ifndef arch_refine_vmap_space
> +static inline void arch_refine_vmap_space(struct rb_root *root,
> +					  struct list_head *head,
> +					  struct kmem_cache *cachep)
> +{
> +}
> +#endif
> +
>  /*
>   *	Highlevel APIs for driver use
>   */
> @@ -214,6 +230,14 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size,
>  void free_vm_area(struct vm_struct *area);
>  extern struct vm_struct *remove_vm_area(const void *addr);
>  extern struct vm_struct *find_vm_area(const void *addr);
> +extern void insert_vmap_area_augment(struct vmap_area *va, struct rb_node *from,
> +				     struct rb_root *root,
> +				     struct list_head *head);
> +extern int va_clip(struct rb_root *root, struct list_head *head,
> +		   struct vmap_area *va, unsigned long nva_start_addr,
> +		   unsigned long size);
> +extern struct vmap_area *__find_vmap_area(unsigned long addr,
> +					  struct rb_root *root);
>
To me it looks like you want to make internal functions as public for
everyone which is not good, imho.

>  struct vmap_area *find_vmap_area(unsigned long addr);
>  
>  static inline bool is_vm_area_hugepages(const void *addr)
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 68fa001648cc..de4577a3708e 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -989,7 +989,7 @@ unsigned long vmalloc_nr_pages(void)
>  	return atomic_long_read(&nr_vmalloc_pages);
>  }
>  
> -static struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
> +struct vmap_area *__find_vmap_area(unsigned long addr, struct rb_root *root)
>  {
>  	struct rb_node *n = root->rb_node;
>  
> @@ -1322,7 +1322,7 @@ insert_vmap_area(struct vmap_area *va,
>  		link_va(va, root, parent, link, head);
>  }
>  
> -static void
> +void
>  insert_vmap_area_augment(struct vmap_area *va,
>  	struct rb_node *from, struct rb_root *root,
>  	struct list_head *head)
> @@ -1501,7 +1501,7 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size,
>  				vstart < va->va_start) {
>  			node = node->rb_left;
>  		} else {
> -			if (is_within_this_va(va, size, align, vstart))
> +			if (!arch_skip_va(va, vstart) && is_within_this_va(va, size, align, vstart))
>  				return va;
>  
>  			/*
> @@ -1522,7 +1522,8 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size,
>  			 */
>  			while ((node = rb_parent(node))) {
>  				va = rb_entry(node, struct vmap_area, rb_node);
> -				if (is_within_this_va(va, size, align, vstart))
> +				if (!arch_skip_va(va, vstart) &&
> +				    is_within_this_va(va, size, align, vstart))
>  					return va;
>  
>  				if (get_subtree_max_size(node->rb_right) >= length &&
> @@ -1554,7 +1555,7 @@ find_vmap_lowest_linear_match(struct list_head *head, unsigned long size,
>  	struct vmap_area *va;
>  
>  	list_for_each_entry(va, head, list) {
> -		if (!is_within_this_va(va, size, align, vstart))
> +		if (arch_skip_va(va, vstart) || !is_within_this_va(va, size, align, vstart))
>  			continue;
>  
arch_skip_va() injections into the search algorithm sounds like a hack
and might lead(if i do not miss something, need to check closer) to alloc
failures when we go toward a reserved VA but we are not allowed to allocate
from.

>  		return va;
> @@ -1617,7 +1618,7 @@ classify_va_fit_type(struct vmap_area *va,
>  	return type;
>  }
>  
> -static __always_inline int
> +__always_inline int
>  va_clip(struct rb_root *root, struct list_head *head,
>  		struct vmap_area *va, unsigned long nva_start_addr,
>  		unsigned long size)
> @@ -5129,4 +5130,7 @@ void __init vmalloc_init(void)
>  	vmap_node_shrinker->count_objects = vmap_node_shrink_count;
>  	vmap_node_shrinker->scan_objects = vmap_node_shrink_scan;
>  	shrinker_register(vmap_node_shrinker);
> +
> +	arch_refine_vmap_space(&free_vmap_area_root, &free_vmap_area_list,
> +			       vmap_area_cachep);
>  }
>
Why do not you allocate just using a specific range from MODULES_ASLR_START
till VMALLOC_END?

Thanks!

--
Uladzislau Rezki

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/5] mm: allow arch refinement/skip for vmap alloc
  2024-04-18  8:55   ` [PATCH 1/5] " Uladzislau Rezki
@ 2024-04-18 15:52     ` Maxwell Bland
  0 siblings, 0 replies; 18+ messages in thread
From: Maxwell Bland @ 2024-04-18 15:52 UTC (permalink / raw)
  To: Uladzislau Rezki
  Cc: linux-mm, linux-kernel, Andrew Morton, Christoph Hellwig,
	Lorenzo Stoakes

On Thu, April 18, 2024 at 3:55 AM, Uladzislau Rezki wrote:
> On Tue, Apr 02, 2024 at 03:15:01PM -0500, Maxwell Bland wrote:
> > +extern void insert_vmap_area_augment(struct vmap_area *va, struct rb_node
> > +extern int va_clip(struct rb_root *root, struct list_head *head, +extern
> > struct vmap_area *__find_vmap_area(unsigned long addr,
> To me it looks like you want to make internal functions as public for
> everyone which is not good, imho.

First, thank you for the feedback. I tussled with some of these ideas too while
writing. I will clarify some motivations below and then propose some
alternatives based upon your review.

> arch_skip_va() injections into the search algorithm sounds like a hack and
> might lead(if i do not miss something, need to check closer) to alloc
> failures when we go toward a reserved VA but we are not allowed to allocate
> from.

This is a good insight into the architectural intention here. As is clear, the
underlying goal of this patch is to provide a method for architectures to
enforce their own pseudo-reserved vmalloc regions dynamically.

This considered, the highlighted potential failures would technically be
legitimate with the caveat of making architectures who implement the interface
responsible for maintaining only correct and appropriate reservations?

If so, then the path diverges conditioned on whether we believe that caveat is
reasonable. I am on the fence about whether freedom is good here, so I think it
is reasonable to disallow this freedom, see below.

> Why do not you allocate just using a specific range from MODULES_ASLR_START
> till VMALLOC_END?

Mark Rutland has indicated that he does not support a large free region size
reduction in favor of ensuring pages are not interleaved. That is, this was my
initial approach, but it was deemed unfit. Strict partitioning creates a
trade-off between region size and ASLR randomization.

To clarify a secondary point, in case this question was more general: allowing
interleaving between VMALLOC_START to VMALLOC_END and MODULES_ASLR_START to
MODULES_ASLR_END regions breaks a key usecase of being able to enforce new
PMD-level and coarse-grained protections (e.g. PXNTable) dynamically.

In case the question is more of a "why are you submitting this in the first
place": non-interleaving simplifies code focused on preventing malicious page
table updates since we do not need to track all updates of PTE level
descriptors. Verifying individual PTE updates comes at a high (performance,
complexity) cost and happens to lead to hardware-level privilege-checking race
conditions on certain very popular arm64 chipsets.

OK, preamble out of the way:

(1) Would it be OK to potentially export a more generic version of the
functions written in arch/arm64/kernel/vmalloc.c for

https://lore.kernel.org/all/20240416122254.868007168-3-mbland@motorola.com/

That is, move a version of these functions to the main vmalloc.c? This way
these functions are still owned by the right part of the kernel.

Or (2) the exported functions could be duplicated, effectively, into
architecture-specific code, a sort of "all in" to the caveat mentioned above of
making the architectures responsible for maintaining a reserved code region if
they choose to implement the interface.

(3) Potentially a different approach that does not involve skipping the
allocation of "bad" VA's but instead dynamically restructures the tree,
potentially just creating two trees, one for data and one for code, is in mind.

Thanks and Regards,
Maxwell Bland

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2024-04-18 15:53 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-15 20:16 [PATCH 0/5] mm: code and data partitioning improvements Maxwell Bland
2024-04-02 20:15 ` [PATCH 1/5] mm: allow arch refinement/skip for vmap alloc Maxwell Bland
2024-04-16 19:18   ` [PATCH 1/5 RESEND] " Maxwell Bland
2024-04-18  8:55   ` [PATCH 1/5] " Uladzislau Rezki
2024-04-18 15:52     ` Maxwell Bland
2024-04-03 21:08 ` [PATCH 2/5] arm64: mm: code and data partitioning for aslr Maxwell Bland
2024-04-16 19:18   ` [PATCH 2/5 RESEND] " Maxwell Bland
2024-04-17  5:14   ` [PATCH 2/5] " kernel test robot
2024-04-15 19:51 ` [PATCH 5/5] ptdump: add state parameter for non-leaf callback Maxwell Bland
2024-04-16 19:18   ` [PATCH 5/5 RESEND] " Maxwell Bland
2024-04-16 20:11   ` [PATCH 5/5] " Andrew Morton
2024-04-16 21:01     ` Maxwell Bland
2024-04-16 19:18 ` [PATCH 0/5 RESEND] mm: code and data partitioning improvements Maxwell Bland
2024-04-16 19:18 ` [PATCH 3/5 RESEND] mm: add vaddr param to pmd_populate_kernel Maxwell Bland
2024-04-05 18:37   ` [PATCH 3/5] " Maxwell Bland
2024-04-16 19:18 ` [PATCH 4/5 RESEND] arm64: dynamic enforcement of PXNTable Maxwell Bland
2024-04-12 15:00   ` [PATCH 4/5] " Maxwell Bland
2024-04-17  6:37   ` [PATCH 4/5 RESEND] " kernel test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).