All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: linux@armlinux.org.uk, linux-arm-kernel@lists.infradead.org
Cc: linux-hardening@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
	Arnd Bergmann <arnd@arndb.de>, Kees Cook <keescook@chromium.org>,
	Keith Packard <keithpac@amazon.com>,
	Linus Walleij <linus.walleij@linaro.org>,
	Nick Desaulniers <ndesaulniers@google.com>,
	Marc Zyngier <maz@kernel.org>
Subject: [PATCH v6 5/8] ARM: mm: make vmalloc_seq handling SMP safe
Date: Tue, 25 Jan 2022 10:14:50 +0100	[thread overview]
Message-ID: <20220125091453.1475246-6-ardb@kernel.org> (raw)
In-Reply-To: <20220125091453.1475246-1-ardb@kernel.org>

Rework the vmalloc_seq handling so it can be used safely under SMP, as
we started using it to ensure that vmap'ed stacks are guaranteed to be
mapped by the active mm before switching to a task, and here we need to
ensure that changes to the page tables are visible to other CPUs when
they observe a change in the sequence count.

Since LPAE needs none of this, fold a check against it into the
vmalloc_seq counter check after breaking it out into a separate static
inline helper.

Given that vmap'ed stacks are now also supported on !SMP configurations,
let's drop the WARN() that could potentially now fire spuriously.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/include/asm/mmu.h         |  2 +-
 arch/arm/include/asm/mmu_context.h | 22 +++++++++++++++--
 arch/arm/include/asm/page.h        |  3 +--
 arch/arm/kernel/traps.c            | 25 ++++++--------------
 arch/arm/mm/context.c              |  3 +--
 arch/arm/mm/ioremap.c              | 18 ++++++++------
 6 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 1592a4264488..e049723840d3 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -10,7 +10,7 @@ typedef struct {
 #else
 	int		switch_pending;
 #endif
-	unsigned int	vmalloc_seq;
+	atomic_t	vmalloc_seq;
 	unsigned long	sigpage;
 #ifdef CONFIG_VDSO
 	unsigned long	vdso;
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index 84e58956fcab..db2cb06aa8cf 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -23,6 +23,16 @@
 
 void __check_vmalloc_seq(struct mm_struct *mm);
 
+#ifdef CONFIG_MMU
+static inline void check_vmalloc_seq(struct mm_struct *mm)
+{
+	if (!IS_ENABLED(CONFIG_ARM_LPAE) &&
+	    unlikely(atomic_read(&mm->context.vmalloc_seq) !=
+		     atomic_read(&init_mm.context.vmalloc_seq)))
+		__check_vmalloc_seq(mm);
+}
+#endif
+
 #ifdef CONFIG_CPU_HAS_ASID
 
 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
@@ -52,8 +62,7 @@ static inline void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
 static inline void check_and_switch_context(struct mm_struct *mm,
 					    struct task_struct *tsk)
 {
-	if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
-		__check_vmalloc_seq(mm);
+	check_vmalloc_seq(mm);
 
 	if (irqs_disabled())
 		/*
@@ -129,6 +138,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 #endif
 }
 
+#ifdef CONFIG_VMAP_STACK
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+	if (mm != &init_mm)
+		check_vmalloc_seq(mm);
+}
+#define enter_lazy_tlb enter_lazy_tlb
+#endif
+
 #include <asm-generic/mmu_context.h>
 
 #endif
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 7b871ed99ccf..5fcc8a600e36 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -147,11 +147,10 @@ extern void copy_page(void *to, const void *from);
 #include <asm/pgtable-3level-types.h>
 #else
 #include <asm/pgtable-2level-types.h>
-#endif
-
 #ifdef CONFIG_VMAP_STACK
 #define ARCH_PAGE_TABLE_SYNC_MASK	PGTBL_PMD_MODIFIED
 #endif
+#endif
 
 #endif /* CONFIG_MMU */
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 3f38357efc46..08612032aefe 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -885,6 +885,7 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
 	die("kernel stack overflow", regs, 0);
 }
 
+#ifndef CONFIG_ARM_LPAE
 /*
  * Normally, we rely on the logic in do_translation_fault() to update stale PMD
  * entries covering the vmalloc space in a task's page tables when it first
@@ -895,26 +896,14 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
  * So we need to ensure that these PMD entries are up to date *before* the MM
  * switch. As we already have some logic in the MM switch path that takes care
  * of this, let's trigger it by bumping the counter every time the core vmalloc
- * code modifies a PMD entry in the vmalloc region.
+ * code modifies a PMD entry in the vmalloc region. Use release semantics on
+ * the store so that other CPUs observing the counter's new value are
+ * guaranteed to see the updated page table entries as well.
  */
 void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
 {
-	if (start > VMALLOC_END || end < VMALLOC_START)
-		return;
-
-	/*
-	 * This hooks into the core vmalloc code to receive notifications of
-	 * any PMD level changes that have been made to the kernel page tables.
-	 * This means it should only be triggered once for every MiB worth of
-	 * vmalloc space, given that we don't support huge vmalloc/vmap on ARM,
-	 * and that kernel PMD level table entries are rarely (if ever)
-	 * updated.
-	 *
-	 * This means that the counter is going to max out at ~250 for the
-	 * typical case. If it overflows, something entirely unexpected has
-	 * occurred so let's throw a warning if that happens.
-	 */
-	WARN_ON(++init_mm.context.vmalloc_seq == UINT_MAX);
+	if (start < VMALLOC_END && end > VMALLOC_START)
+		atomic_inc_return_release(&init_mm.context.vmalloc_seq);
 }
-
+#endif
 #endif
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 48091870db89..4204ffa2d104 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -240,8 +240,7 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
 	unsigned int cpu = smp_processor_id();
 	u64 asid;
 
-	if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
-		__check_vmalloc_seq(mm);
+	check_vmalloc_seq(mm);
 
 	/*
 	 * We cannot update the pgd and the ASID atomicly with classic
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 6e830b9418c9..8963c8c63471 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -117,16 +117,21 @@ EXPORT_SYMBOL(ioremap_page);
 
 void __check_vmalloc_seq(struct mm_struct *mm)
 {
-	unsigned int seq;
+	int seq;
 
 	do {
-		seq = init_mm.context.vmalloc_seq;
+		seq = atomic_read(&init_mm.context.vmalloc_seq);
 		memcpy(pgd_offset(mm, VMALLOC_START),
 		       pgd_offset_k(VMALLOC_START),
 		       sizeof(pgd_t) * (pgd_index(VMALLOC_END) -
 					pgd_index(VMALLOC_START)));
-		mm->context.vmalloc_seq = seq;
-	} while (seq != init_mm.context.vmalloc_seq);
+		/*
+		 * Use a store-release so that other CPUs that observe the
+		 * counter's new value are guaranteed to see the results of the
+		 * memcpy as well.
+		 */
+		atomic_set_release(&mm->context.vmalloc_seq, seq);
+	} while (seq != atomic_read(&init_mm.context.vmalloc_seq));
 }
 
 #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
@@ -157,7 +162,7 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
 			 * Note: this is still racy on SMP machines.
 			 */
 			pmd_clear(pmdp);
-			init_mm.context.vmalloc_seq++;
+			atomic_inc_return_release(&init_mm.context.vmalloc_seq);
 
 			/*
 			 * Free the page table, if there was one.
@@ -174,8 +179,7 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
 	 * Ensure that the active_mm is up to date - we want to
 	 * catch any use-after-iounmap cases.
 	 */
-	if (current->active_mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)
-		__check_vmalloc_seq(current->active_mm);
+	check_vmalloc_seq(current->active_mm);
 
 	flush_tlb_kernel_range(virt, end);
 }
-- 
2.30.2


WARNING: multiple messages have this Message-ID (diff)
From: Ard Biesheuvel <ardb@kernel.org>
To: linux@armlinux.org.uk, linux-arm-kernel@lists.infradead.org
Cc: linux-hardening@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
	Arnd Bergmann <arnd@arndb.de>, Kees Cook <keescook@chromium.org>,
	Keith Packard <keithpac@amazon.com>,
	Linus Walleij <linus.walleij@linaro.org>,
	Nick Desaulniers <ndesaulniers@google.com>,
	Marc Zyngier <maz@kernel.org>
Subject: [PATCH v6 5/8] ARM: mm: make vmalloc_seq handling SMP safe
Date: Tue, 25 Jan 2022 10:14:50 +0100	[thread overview]
Message-ID: <20220125091453.1475246-6-ardb@kernel.org> (raw)
In-Reply-To: <20220125091453.1475246-1-ardb@kernel.org>

Rework the vmalloc_seq handling so it can be used safely under SMP, as
we started using it to ensure that vmap'ed stacks are guaranteed to be
mapped by the active mm before switching to a task, and here we need to
ensure that changes to the page tables are visible to other CPUs when
they observe a change in the sequence count.

Since LPAE needs none of this, fold a check against it into the
vmalloc_seq counter check after breaking it out into a separate static
inline helper.

Given that vmap'ed stacks are now also supported on !SMP configurations,
let's drop the WARN() that could potentially now fire spuriously.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/include/asm/mmu.h         |  2 +-
 arch/arm/include/asm/mmu_context.h | 22 +++++++++++++++--
 arch/arm/include/asm/page.h        |  3 +--
 arch/arm/kernel/traps.c            | 25 ++++++--------------
 arch/arm/mm/context.c              |  3 +--
 arch/arm/mm/ioremap.c              | 18 ++++++++------
 6 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index 1592a4264488..e049723840d3 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -10,7 +10,7 @@ typedef struct {
 #else
 	int		switch_pending;
 #endif
-	unsigned int	vmalloc_seq;
+	atomic_t	vmalloc_seq;
 	unsigned long	sigpage;
 #ifdef CONFIG_VDSO
 	unsigned long	vdso;
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index 84e58956fcab..db2cb06aa8cf 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -23,6 +23,16 @@
 
 void __check_vmalloc_seq(struct mm_struct *mm);
 
+#ifdef CONFIG_MMU
+static inline void check_vmalloc_seq(struct mm_struct *mm)
+{
+	if (!IS_ENABLED(CONFIG_ARM_LPAE) &&
+	    unlikely(atomic_read(&mm->context.vmalloc_seq) !=
+		     atomic_read(&init_mm.context.vmalloc_seq)))
+		__check_vmalloc_seq(mm);
+}
+#endif
+
 #ifdef CONFIG_CPU_HAS_ASID
 
 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
@@ -52,8 +62,7 @@ static inline void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
 static inline void check_and_switch_context(struct mm_struct *mm,
 					    struct task_struct *tsk)
 {
-	if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
-		__check_vmalloc_seq(mm);
+	check_vmalloc_seq(mm);
 
 	if (irqs_disabled())
 		/*
@@ -129,6 +138,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 #endif
 }
 
+#ifdef CONFIG_VMAP_STACK
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+{
+	if (mm != &init_mm)
+		check_vmalloc_seq(mm);
+}
+#define enter_lazy_tlb enter_lazy_tlb
+#endif
+
 #include <asm-generic/mmu_context.h>
 
 #endif
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 7b871ed99ccf..5fcc8a600e36 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -147,11 +147,10 @@ extern void copy_page(void *to, const void *from);
 #include <asm/pgtable-3level-types.h>
 #else
 #include <asm/pgtable-2level-types.h>
-#endif
-
 #ifdef CONFIG_VMAP_STACK
 #define ARCH_PAGE_TABLE_SYNC_MASK	PGTBL_PMD_MODIFIED
 #endif
+#endif
 
 #endif /* CONFIG_MMU */
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 3f38357efc46..08612032aefe 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -885,6 +885,7 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
 	die("kernel stack overflow", regs, 0);
 }
 
+#ifndef CONFIG_ARM_LPAE
 /*
  * Normally, we rely on the logic in do_translation_fault() to update stale PMD
  * entries covering the vmalloc space in a task's page tables when it first
@@ -895,26 +896,14 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
  * So we need to ensure that these PMD entries are up to date *before* the MM
  * switch. As we already have some logic in the MM switch path that takes care
  * of this, let's trigger it by bumping the counter every time the core vmalloc
- * code modifies a PMD entry in the vmalloc region.
+ * code modifies a PMD entry in the vmalloc region. Use release semantics on
+ * the store so that other CPUs observing the counter's new value are
+ * guaranteed to see the updated page table entries as well.
  */
 void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
 {
-	if (start > VMALLOC_END || end < VMALLOC_START)
-		return;
-
-	/*
-	 * This hooks into the core vmalloc code to receive notifications of
-	 * any PMD level changes that have been made to the kernel page tables.
-	 * This means it should only be triggered once for every MiB worth of
-	 * vmalloc space, given that we don't support huge vmalloc/vmap on ARM,
-	 * and that kernel PMD level table entries are rarely (if ever)
-	 * updated.
-	 *
-	 * This means that the counter is going to max out at ~250 for the
-	 * typical case. If it overflows, something entirely unexpected has
-	 * occurred so let's throw a warning if that happens.
-	 */
-	WARN_ON(++init_mm.context.vmalloc_seq == UINT_MAX);
+	if (start < VMALLOC_END && end > VMALLOC_START)
+		atomic_inc_return_release(&init_mm.context.vmalloc_seq);
 }
-
+#endif
 #endif
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 48091870db89..4204ffa2d104 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -240,8 +240,7 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
 	unsigned int cpu = smp_processor_id();
 	u64 asid;
 
-	if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
-		__check_vmalloc_seq(mm);
+	check_vmalloc_seq(mm);
 
 	/*
 	 * We cannot update the pgd and the ASID atomicly with classic
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 6e830b9418c9..8963c8c63471 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -117,16 +117,21 @@ EXPORT_SYMBOL(ioremap_page);
 
 void __check_vmalloc_seq(struct mm_struct *mm)
 {
-	unsigned int seq;
+	int seq;
 
 	do {
-		seq = init_mm.context.vmalloc_seq;
+		seq = atomic_read(&init_mm.context.vmalloc_seq);
 		memcpy(pgd_offset(mm, VMALLOC_START),
 		       pgd_offset_k(VMALLOC_START),
 		       sizeof(pgd_t) * (pgd_index(VMALLOC_END) -
 					pgd_index(VMALLOC_START)));
-		mm->context.vmalloc_seq = seq;
-	} while (seq != init_mm.context.vmalloc_seq);
+		/*
+		 * Use a store-release so that other CPUs that observe the
+		 * counter's new value are guaranteed to see the results of the
+		 * memcpy as well.
+		 */
+		atomic_set_release(&mm->context.vmalloc_seq, seq);
+	} while (seq != atomic_read(&init_mm.context.vmalloc_seq));
 }
 
 #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
@@ -157,7 +162,7 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
 			 * Note: this is still racy on SMP machines.
 			 */
 			pmd_clear(pmdp);
-			init_mm.context.vmalloc_seq++;
+			atomic_inc_return_release(&init_mm.context.vmalloc_seq);
 
 			/*
 			 * Free the page table, if there was one.
@@ -174,8 +179,7 @@ static void unmap_area_sections(unsigned long virt, unsigned long size)
 	 * Ensure that the active_mm is up to date - we want to
 	 * catch any use-after-iounmap cases.
 	 */
-	if (current->active_mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)
-		__check_vmalloc_seq(current->active_mm);
+	check_vmalloc_seq(current->active_mm);
 
 	flush_tlb_kernel_range(virt, end);
 }
-- 
2.30.2


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2022-01-25  9:28 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-25  9:14 [PATCH v6 0/8] ARM vmap'ed and IRQ stacks roundup Ard Biesheuvel
2022-01-25  9:14 ` Ard Biesheuvel
2022-01-25  9:14 ` [PATCH v6 1/8] ARM: mm: switch to swapper_pg_dir early for vmap'ed stack Ard Biesheuvel
2022-01-25  9:14   ` Ard Biesheuvel
2022-01-25  9:14 ` [PATCH v6 2/8] ARM: assembler: define a Kconfig symbol for group relocation support Ard Biesheuvel
2022-01-25  9:14   ` Ard Biesheuvel
2022-01-25  9:14 ` [PATCH v6 3/8] ARM: smp: elide HWCAP_TLS checks or __entry_task updates on SMP+v6 Ard Biesheuvel
2022-01-25  9:14   ` Ard Biesheuvel
2022-01-25  9:14 ` [PATCH v6 4/8] ARM: entry: avoid clobbering R9 in IRQ handler Ard Biesheuvel
2022-01-25  9:14   ` Ard Biesheuvel
2022-01-25  9:14 ` Ard Biesheuvel [this message]
2022-01-25  9:14   ` [PATCH v6 5/8] ARM: mm: make vmalloc_seq handling SMP safe Ard Biesheuvel
2022-01-25  9:14 ` [PATCH v6 6/8] ARM: iop: make iop_handle_irq() static Ard Biesheuvel
2022-01-25  9:14   ` Ard Biesheuvel
2022-01-25  9:14 ` [PATCH v6 7/8] ARM: drop pointless SMP check on secondary startup path Ard Biesheuvel
2022-01-25  9:14   ` Ard Biesheuvel
2022-01-25  9:14 ` [PATCH v6 8/8] ARM: make get_current() and __my_cpu_offset() __always_inline Ard Biesheuvel
2022-01-25  9:14   ` Ard Biesheuvel
2022-01-25 20:48   ` Nick Desaulniers
2022-01-25 20:48     ` Nick Desaulniers

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220125091453.1475246-6-ardb@kernel.org \
    --to=ardb@kernel.org \
    --cc=arnd@arndb.de \
    --cc=keescook@chromium.org \
    --cc=keithpac@amazon.com \
    --cc=linus.walleij@linaro.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-hardening@vger.kernel.org \
    --cc=linux@armlinux.org.uk \
    --cc=maz@kernel.org \
    --cc=ndesaulniers@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.