All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 1/3] powerpc/64s: Fix system call emulation
@ 2022-01-17 14:24 Nicholas Piggin
  2022-01-17 14:24 ` [RFC PATCH 2/3] powerpc/64s: Emulate scv syscalls if facility unavailable and PR KVM is possible Nicholas Piggin
  2022-01-17 14:24 ` [RFC PATCH 3/3] KVM: PPC: Book3S PR: Disable SCV when running AIL is disabled Nicholas Piggin
  0 siblings, 2 replies; 3+ messages in thread
From: Nicholas Piggin @ 2022-01-17 14:24 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Cédric Le Goater, Nicholas Piggin

Interrupt return code expects the returned-to irq state to be reconciled
with the returned-to MSR[EE] state. That is, if local irqs are enabled
then MSR[EE] must be set, and if MSR[EE] is not set then local irqs must
be disabled.

System call emulation (both sc and scv 0) does not get this right, it
tries to return to a context with MSR[EE]=0 and local irqs enabled. This
confuses interrupt return and triggers a warning and probably worse.

Fix this by returning to the system call entry points with MSR[EE]=0 and
local irqs disabled. Add a case to deal with this kind of entry in the
system call handler. The difficulty is that the interrupt return changes
from user-mode to kernel mode, so it does not restore everything to the
way it would look when coming from userspace (e.g., CPU accounting, kuap,
the pkey regs, etc).

XXX: I don't know if this is quite the best problem. System call emulation
is much more complicated than it looks due to this return-to-kernel problem.
Even now the patch relies on SOFTE being set in the stack by the interrupt
return reading back the same way by the system call handler that creates
a new stack at the same position (this is how it determines it was an
emulated syscall). Not only that but suspect the IAMR is not being restored
correctly here and the correct user value on the stack gets clobbered.

Better option might be to have per-thread data that sets an emulated
syscall required flag and saves certain things like iamr. Or possibly just
bite the bullet and create new entry points for syscall emulation.
---
 arch/powerpc/kernel/interrupt.c    | 35 ++++++++++++++++++++----------
 arch/powerpc/kernel/interrupt_64.S | 10 ---------
 arch/powerpc/lib/sstep.c           |  9 +++++---
 3 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 7cd6ce3ec423..e73ad5842cb0 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -81,10 +81,31 @@ notrace long system_call_exception(long r3, long r4, long r5,
 {
 	syscall_fn f;
 
-	kuap_lock();
-
 	regs->orig_gpr3 = r3;
 
+	if (IS_ENABLED(CONFIG_PPC64) &&
+			unlikely(arch_irq_disabled_regs(regs))) {
+		irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
+		/*
+		 * The first stack frame entry will have IRQS_ENABLED except
+		 * in the case of syscall emulation, where the syscall entry
+		 * code is returned-to with MSR[EE] disabled, which requires
+		 * regs->softe is IRQS_DISABLED to avoid triggering the
+		 * interrupt return code warning for returning to local irqs
+		 * enabled but MSR[EE]=0. Not a big deal to re-set it here.
+		 */
+#ifdef CONFIG_PPC_BOOK3S_64
+		set_kuap(AMR_KUAP_BLOCKED);
+#endif
+		if (trap_is_scv(regs))
+			local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+		/* XXX: pkey save? Did we save the wrong values in stack
+		 * from userspace now? */
+		goto skip_user_entry;
+	}
+
+	kuap_lock();
+
 	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
 		BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
 
@@ -95,7 +116,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
 
 	BUG_ON(regs_is_unrecoverable(regs));
 	BUG_ON(!(regs->msr & MSR_PR));
-	BUG_ON(arch_irq_disabled_regs(regs));
 
 #ifdef CONFIG_PPC_PKEY
 	if (mmu_has_feature(MMU_FTR_PKEY)) {
@@ -129,14 +149,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
 
 	account_stolen_time();
 
-	/*
-	 * This is not required for the syscall exit path, but makes the
-	 * stack frame look nicer. If this was initialised in the first stack
-	 * frame, or if the unwinder was taught the first stack frame always
-	 * returns to user with IRQS_ENABLED, this store could be avoided!
-	 */
-	irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
-
+skip_user_entry:
 	/*
 	 * If system call is called with TM active, set _TIF_RESTOREALL to
 	 * prevent RFSCV being used to return to userspace, because POWER9
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index 7bab2d7de372..6471034c7909 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -219,16 +219,6 @@ system_call_vectored common 0x3000
  */
 system_call_vectored sigill 0x7ff0
 
-
-/*
- * Entered via kernel return set up by kernel/sstep.c, must match entry regs
- */
-	.globl system_call_vectored_emulate
-system_call_vectored_emulate:
-_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate)
-	li	r10,IRQS_ALL_DISABLED
-	stb	r10,PACAIRQSOFTMASK(r13)
-	b	system_call_vectored_common
 #endif /* CONFIG_PPC_BOOK3S */
 
 	.balign IFETCH_ALIGN_BYTES
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index a94b0cd0bdc5..62d3fd925dde 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -16,7 +16,7 @@
 #include <asm/disassemble.h>
 
 extern char system_call_common[];
-extern char system_call_vectored_emulate[];
+extern char system_call_vectored_common[];
 
 #ifdef CONFIG_PPC64
 /* Bits in SRR1 that are copied from MSR */
@@ -3667,6 +3667,8 @@ int emulate_step(struct pt_regs *regs, ppc_inst_t instr)
 		regs->gpr[11] = regs->nip + 4;
 		regs->gpr[12] = regs->msr & MSR_MASK;
 		regs->gpr[13] = (unsigned long) get_paca();
+		// Return code needs regs->softe to match regs->msr & MSR_EE
+		regs->softe = IRQS_ALL_DISABLED;
 		regs_set_return_ip(regs, (unsigned long) &system_call_common);
 		regs_set_return_msr(regs, MSR_KERNEL);
 		return 1;
@@ -3674,11 +3676,12 @@ int emulate_step(struct pt_regs *regs, ppc_inst_t instr)
 #ifdef CONFIG_PPC_BOOK3S_64
 	case SYSCALL_VECTORED_0:	/* scv 0 */
 		regs->gpr[9] = regs->gpr[13];
-		regs->gpr[10] = MSR_KERNEL;
 		regs->gpr[11] = regs->nip + 4;
 		regs->gpr[12] = regs->msr & MSR_MASK;
 		regs->gpr[13] = (unsigned long) get_paca();
-		regs_set_return_ip(regs, (unsigned long) &system_call_vectored_emulate);
+		// Return code needs regs->softe to match regs->msr & MSR_EE
+		regs->softe = IRQS_ALL_DISABLED;
+		regs_set_return_ip(regs, (unsigned long) &system_call_vectored_common);
 		regs_set_return_msr(regs, MSR_KERNEL);
 		return 1;
 #endif
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [RFC PATCH 2/3] powerpc/64s: Emulate scv syscalls if facility unavailable and PR KVM is possible
  2022-01-17 14:24 [RFC PATCH 1/3] powerpc/64s: Fix system call emulation Nicholas Piggin
@ 2022-01-17 14:24 ` Nicholas Piggin
  2022-01-17 14:24 ` [RFC PATCH 3/3] KVM: PPC: Book3S PR: Disable SCV when running AIL is disabled Nicholas Piggin
  1 sibling, 0 replies; 3+ messages in thread
From: Nicholas Piggin @ 2022-01-17 14:24 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Cédric Le Goater, Nicholas Piggin

The SCV facility will be disabled at runtime in the host by PR KVM by
the next change, emulate it in the facility unavailable handler.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/traps.c | 45 +++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 11741703d26e..6e4eaa6bf58d 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -58,6 +58,7 @@
 #include <asm/ppc-opcode.h>
 #include <asm/rio.h>
 #include <asm/fadump.h>
+#include <asm/sstep.h>
 #include <asm/switch_to.h>
 #include <asm/tm.h>
 #include <asm/debug.h>
@@ -1778,6 +1779,49 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
 		return;
 	}
 
+	if (status == FSCR_SCV_LG) {
+		u32 lev;
+		ppc_inst_t instr;
+
+		/* If we didn't advertise the feature to userspace, SIGILL */
+		if (!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_SCV))
+			goto out_msg;
+
+		/*
+		 * PR KVM disables the FSCR[SCV] facility even if the rest of
+		 * the kernel and userspace thought SCV was enabled. THis
+		 * causes scv instructions executed by both the host and the
+		 * guest to come here, in their respective kernels.
+		 *
+		 * If radix is enabled then PR KVM can not be in use and can
+		 * not be our hypervisor, so treat this as a normal illegal
+		 * instruction. If we are hash guest or host, we might need
+		 * to emulate a legitimate scv interrupt here.
+		 */
+		if (radix_enabled())
+			goto out_msg;
+
+		/*
+		 * User is making SCV call with SCV disabled. Emulate it.
+		 */
+		if (get_user_instr(instr, (void __user *)regs->nip)) {
+			pr_err("Failed to fetch the user instruction\n");
+			return;
+		}
+
+		instword = ppc_inst_val(instr);
+		if (WARN_ON_ONCE((instword & 0xfffff01f) != 0x44000001))
+			goto out;
+
+		lev = (instword >> 5) & 0x7f;
+		if (lev == 0) {
+			if (emulate_step(regs, instr) > 0)
+				return;
+			else
+				pr_err_ratelimited("Failed to emulate SCV\n");
+		}
+	}
+
 	if (status == FSCR_TM_LG) {
 		/*
 		 * If we're here then the hardware is TM aware because it
@@ -1799,6 +1843,7 @@ DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
 		return;
 	}
 
+out_msg:
 	pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n",
 		hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr);
 
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [RFC PATCH 3/3] KVM: PPC: Book3S PR: Disable SCV when running AIL is disabled
  2022-01-17 14:24 [RFC PATCH 1/3] powerpc/64s: Fix system call emulation Nicholas Piggin
  2022-01-17 14:24 ` [RFC PATCH 2/3] powerpc/64s: Emulate scv syscalls if facility unavailable and PR KVM is possible Nicholas Piggin
@ 2022-01-17 14:24 ` Nicholas Piggin
  1 sibling, 0 replies; 3+ messages in thread
From: Nicholas Piggin @ 2022-01-17 14:24 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Cédric Le Goater, Nicholas Piggin

PR KVM does not support running with AIL enabled, and SCV does is not
supported with AIL disabled.

When PR KVM disables AIL on a system that has SCV enabled, the guest can
crash the host if it executes scv, or the host can crash itself if
another CPU executes scv while AIL is disabled (e.g., in the pseries
case).

Fix this by disabling the SCV facility when PR KVM disables AIL. The
facility unavailable handler will emulate it.

Alternatives considered and rejected:
- SCV support can not be disabled by PR KVM after boot, because it is
  advertised to userspace with HWCAP.
- AIL can not be disabled on a per-CPU basis. At least when running on
  pseries it is a per-LPAR setting.
- Support for real-mode SCV vectors will not be added because they are
  at 0x17000 so making such a large fixed head space causes immediate
  value limits to be exceeded, requiring a lot rework and more code.
- Disabling SCV for any PR KVM possible kernel will cause a slowdown
  when not using PR KVM.
- A boot time option to disable SCV to use PR KVM is user-hostile.

---
This is not well tested at the moment, and PR KVM would need to emulate
scv for the guest for a complete implementation. At least it should
prevent host crashes.

 arch/powerpc/kernel/exceptions-64s.S  |  5 +++
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 14 ++++++--
 arch/powerpc/kvm/book3s_pr.c          | 52 ++++++++++++++++++++++-----
 3 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 55caeee37c08..9985d061f9bf 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -809,6 +809,11 @@ __start_interrupts:
  * - MSR_EE|MSR_RI is clear (no reentrant exceptions)
  * - Standard kernel environment is set up (stack, paca, etc)
  *
+ * KVM:
+ * These interrupts do not elevate HV 0->1, so HV is not involved. PR disables
+ * the FSCR[SCV] facility before running the guest so scv becomes a program
+ * interrupt and where it can be emulated by the OS.
+ *
  * Call convention:
  *
  * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index a28e5b3daabd..611dd34cf708 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -373,6 +373,12 @@ void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
 }
 EXPORT_SYMBOL_GPL(save_p9_host_os_sprs);
 
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+bool pr_kvm_disabled_reloc_exc(void);
+#else
+static inline bool pr_kvm_disabled_reloc_exc(void) { return false; }
+#endif
+
 /* vcpu guest regs must already be saved */
 void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
 			     struct p9_host_os_sprs *host_os_sprs)
@@ -395,8 +401,12 @@ void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
 		mtspr(SPRN_UAMOR, 0);
 	if (host_os_sprs->amr != vcpu->arch.amr)
 		mtspr(SPRN_AMR, host_os_sprs->amr);
-	if (current->thread.fscr != vcpu->arch.fscr)
-		mtspr(SPRN_FSCR, current->thread.fscr);
+	if (current->thread.fscr != vcpu->arch.fscr) {
+		if (pr_kvm_disabled_reloc_exc())
+			mtspr(SPRN_FSCR, current->thread.fscr & ~FSCR_SCV);
+		else
+			mtspr(SPRN_FSCR, current->thread.fscr);
+	}
 	if (current->thread.dscr != vcpu->arch.dscr)
 		mtspr(SPRN_DSCR, current->thread.dscr);
 	if (vcpu->arch.pspb != 0)
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 6bc9425acb32..d608afb3376b 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -140,9 +140,12 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
 #endif
 
 	/* Disable AIL if supported */
-	if (cpu_has_feature(CPU_FTR_HVMODE) &&
-	    cpu_has_feature(CPU_FTR_ARCH_207S))
-		mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL);
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		if (cpu_has_feature(CPU_FTR_ARCH_207S))
+			mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL);
+		if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
+			mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) & ~FSCR_SCV);
+	}
 
 	vcpu->cpu = smp_processor_id();
 #ifdef CONFIG_PPC_BOOK3S_32
@@ -175,9 +178,12 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
 	kvmppc_save_tm_pr(vcpu);
 
 	/* Enable AIL if supported */
-	if (cpu_has_feature(CPU_FTR_HVMODE) &&
-	    cpu_has_feature(CPU_FTR_ARCH_207S))
-		mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3);
+	if (cpu_has_feature(CPU_FTR_HVMODE)) {
+		if (cpu_has_feature(CPU_FTR_ARCH_207S))
+			mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3);
+		if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
+			mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) | FSCR_SCV);
+	}
 
 	vcpu->cpu = -1;
 }
@@ -1037,6 +1043,8 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)
 
 void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr)
 {
+	if (fscr & FSCR_SCV)
+		fscr &= ~FSCR_SCV; /* SCV must not be enabled */
 	if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) {
 		/* TAR got dropped, drop it in shadow too */
 		kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
@@ -1990,6 +1998,21 @@ static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
 static unsigned int kvm_global_user_count = 0;
 static DEFINE_SPINLOCK(kvm_global_user_count_lock);
 
+bool pr_kvm_disabled_reloc_exc(void)
+{
+	return kvm_global_user_count != 0;
+}
+
+static void disable_scv(void *dummy)
+{
+	mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) & ~FSCR_SCV);
+}
+
+static void enable_scv(void *dummy)
+{
+	mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) | FSCR_SCV);
+}
+
 static int kvmppc_core_init_vm_pr(struct kvm *kvm)
 {
 	mutex_init(&kvm->arch.hpt_mutex);
@@ -2001,8 +2024,17 @@ static int kvmppc_core_init_vm_pr(struct kvm *kvm)
 
 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
 		spin_lock(&kvm_global_user_count_lock);
-		if (++kvm_global_user_count == 1)
+		if (++kvm_global_user_count == 1) {
+			/*
+			 * FSCR isn't context switched except for KVM HV
+			 * entry/exit, so only have to care to keep that
+			 * part up to date.
+			 */
+			if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
+				smp_call_function(disable_scv, NULL, 0);
+			/* SCV must be disabled first */
 			pseries_disable_reloc_on_exc();
+		}
 		spin_unlock(&kvm_global_user_count_lock);
 	}
 	return 0;
@@ -2017,8 +2049,12 @@ static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)
 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
 		spin_lock(&kvm_global_user_count_lock);
 		BUG_ON(kvm_global_user_count == 0);
-		if (--kvm_global_user_count == 0)
+		if (--kvm_global_user_count == 0) {
 			pseries_enable_reloc_on_exc();
+			/* reloc must be enabled irst */
+			if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
+				smp_call_function(enable_scv, NULL, 0);
+		}
 		spin_unlock(&kvm_global_user_count_lock);
 	}
 }
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-01-17 14:26 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-17 14:24 [RFC PATCH 1/3] powerpc/64s: Fix system call emulation Nicholas Piggin
2022-01-17 14:24 ` [RFC PATCH 2/3] powerpc/64s: Emulate scv syscalls if facility unavailable and PR KVM is possible Nicholas Piggin
2022-01-17 14:24 ` [RFC PATCH 3/3] KVM: PPC: Book3S PR: Disable SCV when running AIL is disabled Nicholas Piggin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.