All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] powerpc/64s: ISAv3 initialize MMU registers before setting partition table
@ 2017-12-04  2:40 Nicholas Piggin
  2017-12-05  3:04 ` Michael Ellerman
  0 siblings, 1 reply; 3+ messages in thread
From: Nicholas Piggin @ 2017-12-04  2:40 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Nicholas Piggin, Aneesh Kumar K . V, Michael Ellerman,
	Benjamin Herrenschmidt, Paul Mackerras, Michael Neuling

kexec can leave MMU registers set when booting into a new kernel, PIDR
in particular. The boot sequence does not zero PIDR, so it only gets
set when CPUs first switch to a userspace processes (until then it's
running a kernel thread with effective PID = 0).

This leaves a window where a process table entry and page tables are
set up due to user processes running on other CPUs, that happen to
match with a stale PID. The CPU with that PID may cause speculative
accesses that address quadrant 0, which will result in cached
translations and PWC for that process, on a CPU which is not in the
mm_cpumask and so they will not get invalidated properly.

The most common result is the kernel hanging in infinite page fault
loops soon after kexec (usually in schedule_tail, which is usually the
first non-speculative quardant 0 access to a new PID) due to a stale
PWC. However being a stale translation erorr, it could result in
anything up to security and data corruption errors.

Fix this by zeroing out PIDR before setting PTCR.

LPIDR is also not initialized, and may cause a similar issue with
speculative access to quadrant 1/2. This has not been observed, but
LPIDR is cleared to prevent that possibility.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
Since v1:
- Aneesh noticed hash was not clearing LPID
- Improved changelog
- Improved comments

Please review.

Thanks,
Nick

 arch/powerpc/mm/hash_utils_64.c | 19 +++++++++++++++++--
 arch/powerpc/mm/pgtable-radix.c | 10 ++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 655a5a9a183d..d9f3a72f3981 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1029,13 +1029,24 @@ void __init hash__early_init_mmu(void)
 	pci_io_base = ISA_IO_BASE;
 #endif
 
+	/*
+	 * kexec kernels can boot into the new kernel with PID and LPID
+	 * registers non-zero. Zero them to prevent speculative accesses
+	 * setting up cached translations when we turn the MMU on and
+	 * process/partition table entries start being added.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		mtspr(SPRN_PID, 0);
+
 	/* Select appropriate backend */
 	if (firmware_has_feature(FW_FEATURE_PS3_LV1))
 		ps3_early_mm_init();
 	else if (firmware_has_feature(FW_FEATURE_LPAR))
 		hpte_init_pseries();
-	else if (IS_ENABLED(CONFIG_PPC_NATIVE))
+	else if (IS_ENABLED(CONFIG_PPC_NATIVE)) {
+		mtspr(SPRN_LPID, 0);
 		hpte_init_native();
+	}
 
 	if (!mmu_hash_ops.hpte_insert)
 		panic("hash__early_init_mmu: No MMU hash ops defined!\n");
@@ -1055,11 +1066,15 @@ void __init hash__early_init_mmu(void)
 void hash__early_init_mmu_secondary(void)
 {
 	/* Initialize hash table for that CPU */
-	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		mtspr(SPRN_PID, 0);
 
+	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
 		if (cpu_has_feature(CPU_FTR_POWER9_DD1))
 			update_hid_for_hash();
 
+		mtspr(SPRN_LPID, 0);
+
 		if (!cpu_has_feature(CPU_FTR_ARCH_300))
 			mtspr(SPRN_SDR1, _SDR1);
 		else
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index cfbbee941a76..0c13355ddc2a 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -563,10 +563,18 @@ void __init radix__early_init_mmu(void)
 	__pte_frag_nr = H_PTE_FRAG_NR;
 	__pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
 
+	/*
+	 * kexec kernels can boot into the new kernel with PID and LPID
+	 * registers non-zero. Zero them to prevent speculative accesses
+	 * setting up cached translations when we turn the MMU on and
+	 * process/partition table entries start being added.
+	 */
+	mtspr(SPRN_PID, 0);
 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
 		radix_init_native();
 		if (cpu_has_feature(CPU_FTR_POWER9_DD1))
 			update_hid_for_radix();
+		mtspr(SPRN_LPID, 0);
 		lpcr = mfspr(SPRN_LPCR);
 		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
 		radix_init_partition_table();
@@ -587,10 +595,12 @@ void radix__early_init_mmu_secondary(void)
 	/*
 	 * update partition table control register and UPRT
 	 */
+	mtspr(SPRN_PID, 0);
 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
 
 		if (cpu_has_feature(CPU_FTR_POWER9_DD1))
 			update_hid_for_radix();
+		mtspr(SPRN_LPID, 0);
 
 		lpcr = mfspr(SPRN_LPCR);
 		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
-- 
2.15.0

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] powerpc/64s: ISAv3 initialize MMU registers before setting partition table
  2017-12-04  2:40 [PATCH v2] powerpc/64s: ISAv3 initialize MMU registers before setting partition table Nicholas Piggin
@ 2017-12-05  3:04 ` Michael Ellerman
  2017-12-05  5:53   ` Nicholas Piggin
  0 siblings, 1 reply; 3+ messages in thread
From: Michael Ellerman @ 2017-12-05  3:04 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev
  Cc: Nicholas Piggin, Aneesh Kumar K . V, Benjamin Herrenschmidt,
	Paul Mackerras, Michael Neuling

Hi Nick,

Sorry I didn't reply sooner.

Nicholas Piggin <npiggin@gmail.com> writes:

> kexec can leave MMU registers set when booting into a new kernel, PIDR
> in particular. The boot sequence does not zero PIDR, so it only gets
> set when CPUs first switch to a userspace processes (until then it's
> running a kernel thread with effective PID = 0).
>
> This leaves a window where a process table entry and page tables are
> set up due to user processes running on other CPUs, that happen to
> match with a stale PID. The CPU with that PID may cause speculative
> accesses that address quadrant 0, which will result in cached
> translations and PWC for that process, on a CPU which is not in the
> mm_cpumask and so they will not get invalidated properly.
>
> The most common result is the kernel hanging in infinite page fault
> loops soon after kexec (usually in schedule_tail, which is usually the
> first non-speculative quardant 0 access to a new PID) due to a stale
> PWC. However being a stale translation erorr, it could result in
> anything up to security and data corruption errors.
>
> Fix this by zeroing out PIDR before setting PTCR.
>
> LPIDR is also not initialized, and may cause a similar issue with
> speculative access to quadrant 1/2. This has not been observed, but
> LPIDR is cleared to prevent that possibility.

Isn't LPID initialised in __setup_cpu_power9() and __restore_cpu_power9() ?

eg:

_GLOBAL(__setup_cpu_power9)
	mflr	r11
	bl	__init_FSCR
	bl	__init_PMU
	bl	__init_hvmode_206
	mtlr	r11
	beqlr
	li	r0,0
	mtspr	SPRN_PSSCR,r0
	mtspr	SPRN_LPID,r0


Similarly, shouldn't we be doing the PID initialisation there as well?

cheers

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] powerpc/64s: ISAv3 initialize MMU registers before setting partition table
  2017-12-05  3:04 ` Michael Ellerman
@ 2017-12-05  5:53   ` Nicholas Piggin
  0 siblings, 0 replies; 3+ messages in thread
From: Nicholas Piggin @ 2017-12-05  5:53 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linuxppc-dev, Aneesh Kumar K . V, Benjamin Herrenschmidt,
	Paul Mackerras, Michael Neuling

On Tue, 05 Dec 2017 14:04:42 +1100
Michael Ellerman <mpe@ellerman.id.au> wrote:

> Hi Nick,
> 
> Sorry I didn't reply sooner.
> 
> Nicholas Piggin <npiggin@gmail.com> writes:
> 
> > kexec can leave MMU registers set when booting into a new kernel, PIDR
> > in particular. The boot sequence does not zero PIDR, so it only gets
> > set when CPUs first switch to a userspace processes (until then it's
> > running a kernel thread with effective PID = 0).
> >
> > This leaves a window where a process table entry and page tables are
> > set up due to user processes running on other CPUs, that happen to
> > match with a stale PID. The CPU with that PID may cause speculative
> > accesses that address quadrant 0, which will result in cached
> > translations and PWC for that process, on a CPU which is not in the
> > mm_cpumask and so they will not get invalidated properly.
> >
> > The most common result is the kernel hanging in infinite page fault
> > loops soon after kexec (usually in schedule_tail, which is usually the
> > first non-speculative quardant 0 access to a new PID) due to a stale
> > PWC. However being a stale translation erorr, it could result in
> > anything up to security and data corruption errors.
> >
> > Fix this by zeroing out PIDR before setting PTCR.
> >
> > LPIDR is also not initialized, and may cause a similar issue with
> > speculative access to quadrant 1/2. This has not been observed, but
> > LPIDR is cleared to prevent that possibility.  
> 
> Isn't LPID initialised in __setup_cpu_power9() and __restore_cpu_power9() ?
> 
> eg:
> 
> _GLOBAL(__setup_cpu_power9)
> 	mflr	r11
> 	bl	__init_FSCR
> 	bl	__init_PMU
> 	bl	__init_hvmode_206
> 	mtlr	r11
> 	beqlr
> 	li	r0,0
> 	mtspr	SPRN_PSSCR,r0
> 	mtspr	SPRN_LPID,r0
> 
> 
> Similarly, shouldn't we be doing the PID initialisation there as well?

Hmm, yes I must have missed that! Yes that would be the best place to
put it.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2017-12-05  5:53 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-04  2:40 [PATCH v2] powerpc/64s: ISAv3 initialize MMU registers before setting partition table Nicholas Piggin
2017-12-05  3:04 ` Michael Ellerman
2017-12-05  5:53   ` Nicholas Piggin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.