LinuxPPC-Dev Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH] powerpc/64: interrupt return in C
@ 2019-08-28  9:06 Nicholas Piggin
  2019-08-28  9:28 ` Christophe Leroy
  2019-09-08 10:50 ` Nicholas Piggin
  0 siblings, 2 replies; 4+ messages in thread
From: Nicholas Piggin @ 2019-08-28  9:06 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

This is a work in progress that goes on top of the syscalls in C patch.
It's not quite complete, 64e low level exit is not taken care of, and
the new return is hacked into the existing interrupt handlers pretty
quickly (e.g., full gprs handling is still ugly and could be cleaned),
but that code touches exception-64s.S which is under heavy modification
in parallel so I will rebase on top of that before polishing it properly.
I will also try to convert to more IS_ENABLED() for Christophe.

I guess syscall_64.c will change to interrupt_64.c with this.

Hopefully there is no fundamental problem with it, it replaces some
fairly horrific asm code with stack frame bouncing and reusing for
replay, and almost imcomprehensible maze of exit points and labels.

This is just a quick preview because it's booting, if anyone can spot
major issue with the approach would be good.

Thanks,
Nick
---
 .../powerpc/include/asm/book3s/64/kup-radix.h |   6 +
 arch/powerpc/kernel/entry_64.S                | 475 ++++--------------
 arch/powerpc/kernel/exceptions-64s.S          |  55 +-
 arch/powerpc/kernel/syscall_64.c              | 147 +++++-
 arch/powerpc/kernel/vector.S                  |   2 +-
 5 files changed, 288 insertions(+), 397 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index ef2e65ea8a73..62ff2509cf51 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -60,6 +60,12 @@
 #include <asm/mmu.h>
 #include <asm/ptrace.h>
 
+static inline void kuap_restore_amr(struct pt_regs *regs)
+{
+	if (mmu_has_feature(MMU_FTR_RADIX_KUAP))
+		mtspr(SPRN_AMR, regs->kuap);
+}
+
 static inline void kuap_check_amr(void)
 {
 #ifdef CONFIG_PPC_KUAP_DEBUG
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d2efd1a96487..f1e973789a2a 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -16,6 +16,7 @@
 
 #include <linux/errno.h>
 #include <linux/err.h>
+#include <asm/cache.h>
 #include <asm/unistd.h>
 #include <asm/processor.h>
 #include <asm/page.h>
@@ -130,9 +131,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
 	mtspr	SPRN_SRR0,r4
 	mtspr	SPRN_SRR1,r5
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	std	r5,PACATMSCRATCH(r13)
-#endif
 	mtlr	r6
 
 	cmpdi	r3,0
@@ -197,6 +195,7 @@ tabort_syscall:
 	RFI_TO_USER
 	b	.	/* prevent speculative execution */
 #endif
+
 _GLOBAL(ret_from_fork)
 	bl	schedule_tail
 	REST_NVGPRS(r1)
@@ -436,409 +435,149 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	addi	r1,r1,SWITCH_FRAME_SIZE
 	blr
 
-	.align	7
-_GLOBAL(ret_from_except)
-	ld	r11,_TRAP(r1)
-	andi.	r0,r11,1
-	bne	ret_from_except_lite
-	REST_NVGPRS(r1)
-
-_GLOBAL(ret_from_except_lite)
 	/*
-	 * Disable interrupts so that current_thread_info()->flags
-	 * can't change between when we test it and when we return
-	 * from the interrupt.
-	 */
-#ifdef CONFIG_PPC_BOOK3E
-	wrteei	0
-#else
-	li	r10,MSR_RI
-	mtmsrd	r10,1		  /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
+	 * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
+	 * touched, AMR not set, no exit work created, then this can be used.
+	 */
+	.balign IFETCH_ALIGN_BYTES
+_GLOBAL(fast_interrupt_return)
+	ld	r4,_MSR(r1)
+	andi.	r0,r4,MSR_PR
+	bne	.Lfast_user_interrupt_return
+	andi.	r0,r4,MSR_RI
+	bne+	.Lfast_kernel_interrupt_return
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	unrecoverable_exception
+	b	. /* should not get here */
 
-	ld	r9, PACA_THREAD_INFO(r13)
-	ld	r3,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3E
-	ld	r10,PACACURRENT(r13)
-#endif /* CONFIG_PPC_BOOK3E */
-	ld	r4,TI_FLAGS(r9)
-	andi.	r3,r3,MSR_PR
-	beq	resume_kernel
-#ifdef CONFIG_PPC_BOOK3E
-	lwz	r3,(THREAD+THREAD_DBCR0)(r10)
-#endif /* CONFIG_PPC_BOOK3E */
+	.balign IFETCH_ALIGN_BYTES
+_GLOBAL(interrupt_return)
+	REST_NVGPRS(r1)
 
-	/* Check current_thread_info()->flags */
-	andi.	r0,r4,_TIF_USER_WORK_MASK
-	bne	1f
-#ifdef CONFIG_PPC_BOOK3E
-	/*
-	 * Check to see if the dbcr0 register is set up to debug.
-	 * Use the internal debug mode bit to do this.
-	 */
-	andis.	r0,r3,DBCR0_IDM@h
-	beq	restore
-	mfmsr	r0
-	rlwinm	r0,r0,0,~MSR_DE	/* Clear MSR.DE */
-	mtmsr	r0
-	mtspr	SPRN_DBCR0,r3
-	li	r10, -1
-	mtspr	SPRN_DBSR,r10
-	b	restore
-#else
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	restore_math
-	b	restore
-#endif
-1:	andi.	r0,r4,_TIF_NEED_RESCHED
-	beq	2f
-	bl	restore_interrupts
-	SCHEDULE_USER
-	b	ret_from_except_lite
-2:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	andi.	r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM
-	bne	3f		/* only restore TM if nothing else to do */
+	.balign IFETCH_ALIGN_BYTES
+_GLOBAL(interrupt_return_lite)
+	ld	r4,_MSR(r1)
+	andi.	r0,r4,MSR_PR
+	beq	kernel_interrupt_return
+user_interrupt_return:
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	restore_tm_state
-	b	restore
-3:
-#endif
-	bl	save_nvgprs
-	/*
-	 * Use a non volatile GPR to save and restore our thread_info flags
-	 * across the call to restore_interrupts.
-	 */
-	mr	r30,r4
-	bl	restore_interrupts
-	mr	r4,r30
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	do_notify_resume
-	b	ret_from_except
-
-resume_kernel:
-	/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
-	andis.	r8,r4,_TIF_EMULATE_STACK_STORE@h
-	beq+	1f
+	bl	interrupt_exit_user_prepare
+	cmpdi	r3,0
+	bne-	.Lrestore_nvgprs
 
-	addi	r8,r1,INT_FRAME_SIZE	/* Get the kprobed function entry */
+.Lfast_user_interrupt_return:
+	ld	r11,_NIP(r1)
+	ld	r12,_MSR(r1)
+BEGIN_FTR_SECTION
+	ld	r10,_PPR(r1)
+	mtspr	SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+	mtspr	SPRN_SRR0,r11
+	mtspr	SPRN_SRR1,r12
 
-	ld	r3,GPR1(r1)
-	subi	r3,r3,INT_FRAME_SIZE	/* dst: Allocate a trampoline exception frame */
-	mr	r4,r1			/* src:  current exception frame */
-	mr	r1,r3			/* Reroute the trampoline frame to r1 */
+BEGIN_FTR_SECTION
+	stdcx.	r0,0,r1		/* to clear the reservation */
+FTR_SECTION_ELSE
+	ldarx	r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
-	/* Copy from the original to the trampoline. */
-	li	r5,INT_FRAME_SIZE/8	/* size: INT_FRAME_SIZE */
-	li	r6,0			/* start offset: 0 */
-	mtctr	r5
-2:	ldx	r0,r6,r4
-	stdx	r0,r6,r3
-	addi	r6,r6,8
-	bdnz	2b
-
-	/* Do real store operation to complete stdu */
-	ld	r5,GPR1(r1)
-	std	r8,0(r5)
-
-	/* Clear _TIF_EMULATE_STACK_STORE flag */
-	lis	r11,_TIF_EMULATE_STACK_STORE@h
-	addi	r5,r9,TI_FLAGS
-0:	ldarx	r4,0,r5
-	andc	r4,r4,r11
-	stdcx.	r4,0,r5
-	bne-	0b
-1:
-
-#ifdef CONFIG_PREEMPT
-	/* Check if we need to preempt */
-	andi.	r0,r4,_TIF_NEED_RESCHED
-	beq+	restore
-	/* Check that preempt_count() == 0 and interrupts are enabled */
-	lwz	r8,TI_PREEMPT(r9)
-	cmpwi	cr0,r8,0
-	bne	restore
-	ld	r0,SOFTE(r1)
-	andi.	r0,r0,IRQS_DISABLED
-	bne	restore
+	ld	r3,_CCR(r1)
+	ld	r4,_LINK(r1)
+	ld	r5,_CTR(r1)
+	ld	r6,_XER(r1)
+	li	r0,0
 
-	/*
-	 * Here we are preempting the current task. We want to make
-	 * sure we are soft-disabled first and reconcile irq state.
-	 */
-	RECONCILE_IRQ_STATE(r3,r4)
-	bl	preempt_schedule_irq
+	REST_4GPRS(7, r1)
+	REST_2GPRS(11, r1)
+	REST_GPR(13, r1)
 
-	/*
-	 * arch_local_irq_restore() from preempt_schedule_irq above may
-	 * enable hard interrupt but we really should disable interrupts
-	 * when we return from the interrupt, and so that we don't get
-	 * interrupted after loading SRR0/1.
-	 */
-#ifdef CONFIG_PPC_BOOK3E
-	wrteei	0
-#else
-	li	r10,MSR_RI
-	mtmsrd	r10,1		  /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
-#endif /* CONFIG_PREEMPT */
+	mtcr	r3
+	mtlr	r4
+	mtctr	r5
+	mtspr	SPRN_XER,r6
 
-	.globl	fast_exc_return_irq
-fast_exc_return_irq:
-restore:
-	/*
-	 * This is the main kernel exit path. First we check if we
-	 * are about to re-enable interrupts
-	 */
-	ld	r5,SOFTE(r1)
-	lbz	r6,PACAIRQSOFTMASK(r13)
-	andi.	r5,r5,IRQS_DISABLED
-	bne	.Lrestore_irq_off
+	REST_4GPRS(2, r1)
+	REST_GPR(6, r1)
+	REST_GPR(0, r1)
+	REST_GPR(1, r1)
+	RFI_TO_USER
+	b	.	/* prevent speculative execution */
 
-	/* We are enabling, were we already enabled ? Yes, just return */
-	andi.	r6,r6,IRQS_DISABLED
-	beq	cr0,.Ldo_restore
+.Lrestore_nvgprs:
+	REST_NVGPRS(r1)
+	b	.Lfast_user_interrupt_return
 
-	/*
-	 * We are about to soft-enable interrupts (we are hard disabled
-	 * at this point). We check if there's anything that needs to
-	 * be replayed first.
-	 */
-	lbz	r0,PACAIRQHAPPENED(r13)
-	cmpwi	cr0,r0,0
-	bne-	.Lrestore_check_irq_replay
+	.balign IFETCH_ALIGN_BYTES
+kernel_interrupt_return:
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	interrupt_exit_kernel_prepare
+	cmpdi	cr1,r3,0
 
-	/*
-	 * Get here when nothing happened while soft-disabled, just
-	 * soft-enable and move-on. We will hard-enable as a side
-	 * effect of rfi
-	 */
-.Lrestore_no_replay:
-	TRACE_ENABLE_INTS
-	li	r0,IRQS_ENABLED
-	stb	r0,PACAIRQSOFTMASK(r13);
+.Lfast_kernel_interrupt_return:
+	ld	r11,_NIP(r1)
+	ld	r12,_MSR(r1)
+	mtspr	SPRN_SRR0,r11
+	mtspr	SPRN_SRR1,r12
 
-	/*
-	 * Final return path. BookE is handled in a different file
-	 */
-.Ldo_restore:
-#ifdef CONFIG_PPC_BOOK3E
-	b	exception_return_book3e
-#else
-	/*
-	 * Clear the reservation. If we know the CPU tracks the address of
-	 * the reservation then we can potentially save some cycles and use
-	 * a larx. On POWER6 and POWER7 this is significantly faster.
-	 */
 BEGIN_FTR_SECTION
 	stdcx.	r0,0,r1		/* to clear the reservation */
 FTR_SECTION_ELSE
-	ldarx	r4,0,r1
+	ldarx	r0,0,r1
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
-	/*
-	 * Some code path such as load_up_fpu or altivec return directly
-	 * here. They run entirely hard disabled and do not alter the
-	 * interrupt state. They also don't use lwarx/stwcx. and thus
-	 * are known not to leave dangling reservations.
-	 */
-	.globl	fast_exception_return
-fast_exception_return:
-	ld	r3,_MSR(r1)
-	ld	r4,_CTR(r1)
-	ld	r0,_LINK(r1)
-	mtctr	r4
-	mtlr	r0
-	ld	r4,_XER(r1)
-	mtspr	SPRN_XER,r4
-
-	kuap_check_amr r5, r6
-
-	REST_8GPRS(5, r1)
-
-	andi.	r0,r3,MSR_RI
-	beq-	.Lunrecov_restore
+	ld	r3,_CCR(r1)
+	ld	r4,_LINK(r1)
+	ld	r5,_CTR(r1)
+	ld	r6,_XER(r1)
+	li	r0,0
 
-	/*
-	 * Clear RI before restoring r13.  If we are returning to
-	 * userspace and we take an exception after restoring r13,
-	 * we end up corrupting the userspace r13 value.
-	 */
-	li	r4,0
-	mtmsrd	r4,1
+	REST_4GPRS(7, r1)
+	REST_2GPRS(11, r1)
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	/* TM debug */
-	std	r3, PACATMSCRATCH(r13) /* Stash returned-to MSR */
-#endif
-	/*
-	 * r13 is our per cpu area, only restore it if we are returning to
-	 * userspace the value stored in the stack frame may belong to
-	 * another CPU.
-	 */
-	andi.	r0,r3,MSR_PR
-	beq	1f
-BEGIN_FTR_SECTION
-	/* Restore PPR */
-	ld	r2,_PPR(r1)
-	mtspr	SPRN_PPR,r2
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-	ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
-	REST_GPR(13, r1)
+	bne-	cr1,1f /* emulate stack store */
+	mtcr	r3
+	mtlr	r4
+	mtctr	r5
+	mtspr	SPRN_XER,r6
 
 	/*
-	 * We don't need to restore AMR on the way back to userspace for KUAP.
-	 * The value of AMR only matters while we're in the kernel.
+	 * Leaving a stale exception_marker on the stack can confuse
+	 * the reliable stack unwinder later on. Clear it.
 	 */
-	mtspr	SPRN_SRR1,r3
+	std	r0,STACK_FRAME_OVERHEAD-16(r1)
 
-	ld	r2,_CCR(r1)
-	mtcrf	0xFF,r2
-	ld	r2,_NIP(r1)
-	mtspr	SPRN_SRR0,r2
-
-	ld	r0,GPR0(r1)
-	ld	r2,GPR2(r1)
-	ld	r3,GPR3(r1)
-	ld	r4,GPR4(r1)
-	ld	r1,GPR1(r1)
-	RFI_TO_USER
+	REST_4GPRS(2, r1)
+	REST_GPR(6, r1)
+	REST_GPR(0, r1)
+	REST_GPR(1, r1)
+	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 
-1:	mtspr	SPRN_SRR1,r3
-
-	ld	r2,_CCR(r1)
-	mtcrf	0xFF,r2
-	ld	r2,_NIP(r1)
-	mtspr	SPRN_SRR0,r2
+1:	mtcr	r3
+	mtlr	r4
+	mtctr	r5
+	mtspr	SPRN_XER,r6
 
 	/*
 	 * Leaving a stale exception_marker on the stack can confuse
 	 * the reliable stack unwinder later on. Clear it.
 	 */
-	li	r2,0
-	std	r2,STACK_FRAME_OVERHEAD-16(r1)
+	std	r0,STACK_FRAME_OVERHEAD-16(r1)
 
-	ld	r0,GPR0(r1)
-	ld	r2,GPR2(r1)
-	ld	r3,GPR3(r1)
+	REST_4GPRS(2, r1)
+	REST_GPR(6, r1)
+	REST_GPR(0, r1)
 
-	kuap_restore_amr r4
+	/* Nasty emulate stack store case. */
+	std	r9,PACA_EXGEN+0(r13)
+	addi	r9,r1,INT_FRAME_SIZE /* get original r1 */
+	REST_GPR(1, r1)
+	std	r9,0(r1)
+	ld	r9,PACA_EXGEN+0(r13)
 
-	ld	r4,GPR4(r1)
-	ld	r1,GPR1(r1)
 	RFI_TO_KERNEL
 	b	.	/* prevent speculative execution */
 
-#endif /* CONFIG_PPC_BOOK3E */
-
-	/*
-	 * We are returning to a context with interrupts soft disabled.
-	 *
-	 * However, we may also about to hard enable, so we need to
-	 * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
-	 * or that bit can get out of sync and bad things will happen
-	 */
-.Lrestore_irq_off:
-	ld	r3,_MSR(r1)
-	lbz	r7,PACAIRQHAPPENED(r13)
-	andi.	r0,r3,MSR_EE
-	beq	1f
-	rlwinm	r7,r7,0,~PACA_IRQ_HARD_DIS
-	stb	r7,PACAIRQHAPPENED(r13)
-1:
-#if defined(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && defined(CONFIG_BUG)
-	/* The interrupt should not have soft enabled. */
-	lbz	r7,PACAIRQSOFTMASK(r13)
-1:	tdeqi	r7,IRQS_ENABLED
-	EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
-	b	.Ldo_restore
-
-	/*
-	 * Something did happen, check if a re-emit is needed
-	 * (this also clears paca->irq_happened)
-	 */
-.Lrestore_check_irq_replay:
-	/* XXX: We could implement a fast path here where we check
-	 * for irq_happened being just 0x01, in which case we can
-	 * clear it and return. That means that we would potentially
-	 * miss a decrementer having wrapped all the way around.
-	 *
-	 * Still, this might be useful for things like hash_page
-	 */
-	bl	__check_irq_replay
-	cmpwi	cr0,r3,0
-	beq	.Lrestore_no_replay
- 
-	/*
-	 * We need to re-emit an interrupt. We do so by re-using our
-	 * existing exception frame. We first change the trap value,
-	 * but we need to ensure we preserve the low nibble of it
-	 */
-	ld	r4,_TRAP(r1)
-	clrldi	r4,r4,60
-	or	r4,r4,r3
-	std	r4,_TRAP(r1)
-
-	/*
-	 * PACA_IRQ_HARD_DIS won't always be set here, so set it now
-	 * to reconcile the IRQ state. Tracing is already accounted for.
-	 */
-	lbz	r4,PACAIRQHAPPENED(r13)
-	ori	r4,r4,PACA_IRQ_HARD_DIS
-	stb	r4,PACAIRQHAPPENED(r13)
-
-	/*
-	 * Then find the right handler and call it. Interrupts are
-	 * still soft-disabled and we keep them that way.
-	*/
-	cmpwi	cr0,r3,0x500
-	bne	1f
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
- 	bl	do_IRQ
-	b	ret_from_except
-1:	cmpwi	cr0,r3,0xf00
-	bne	1f
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	bl	performance_monitor_exception
-	b	ret_from_except
-1:	cmpwi	cr0,r3,0xe60
-	bne	1f
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	bl	handle_hmi_exception
-	b	ret_from_except
-1:	cmpwi	cr0,r3,0x900
-	bne	1f
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	bl	timer_interrupt
-	b	ret_from_except
-#ifdef CONFIG_PPC_DOORBELL
-1:
-#ifdef CONFIG_PPC_BOOK3E
-	cmpwi	cr0,r3,0x280
-#else
-	cmpwi	cr0,r3,0xa00
-#endif /* CONFIG_PPC_BOOK3E */
-	bne	1f
-	addi	r3,r1,STACK_FRAME_OVERHEAD;
-	bl	doorbell_exception
-#endif /* CONFIG_PPC_DOORBELL */
-1:	b	ret_from_except /* What else to do here ? */
- 
-.Lunrecov_restore:
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	unrecoverable_exception
-	b	.Lunrecov_restore
-
-_ASM_NOKPROBE_SYMBOL(ret_from_except);
-_ASM_NOKPROBE_SYMBOL(ret_from_except_lite);
-_ASM_NOKPROBE_SYMBOL(resume_kernel);
-_ASM_NOKPROBE_SYMBOL(fast_exc_return_irq);
-_ASM_NOKPROBE_SYMBOL(restore);
-_ASM_NOKPROBE_SYMBOL(fast_exception_return);
-
-
 #ifdef CONFIG_PPC_RTAS
 /*
  * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 768f133de4f1..51223299d22d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -696,7 +696,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 	RECONCILE_IRQ_STATE(r10, r11);					\
 	addi	r3,r1,STACK_FRAME_OVERHEAD;				\
 	bl	hdlr;							\
-	b	ret_from_except
+	b	interrupt_return	
 
 /*
  * Like EXC_COMMON, but for exceptions that can occur in the idle task and
@@ -706,11 +706,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 	EXC_COMMON_BEGIN(name);						\
 	EXCEPTION_COMMON(PACA_EXGEN, realvec);				\
 	FINISH_NAP;							\
+	bl	save_nvgprs;						\
 	RECONCILE_IRQ_STATE(r10, r11);					\
 	RUNLATCH_ON;							\
 	addi	r3,r1,STACK_FRAME_OVERHEAD;				\
 	bl	hdlr;							\
-	b	ret_from_except_lite
+	b	interrupt_return	
 
 
 /*
@@ -1058,7 +1059,7 @@ EXC_COMMON_BEGIN(machine_check_common)
 	bl	save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	machine_check_exception
-	b	ret_from_except
+	b	interrupt_return
 
 #define MACHINE_CHECK_HANDLER_WINDUP			\
 	/* Clear MSR_RI before setting SRR0 and SRR1. */\
@@ -1301,7 +1302,7 @@ BEGIN_MMU_FTR_SECTION
 	bl	do_slb_fault
 	cmpdi	r3,0
 	bne-	1f
-	b	fast_exception_return
+	b	fast_interrupt_return
 1:	/* Error case */
 MMU_FTR_SECTION_ELSE
 	/* Radix case, access is outside page table range */
@@ -1314,7 +1315,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	ld	r5,RESULT(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	do_bad_slb_fault
-	b	ret_from_except
+	b	interrupt_return
 
 
 EXC_REAL(instruction_access, 0x400, 0x80)
@@ -1350,7 +1351,7 @@ BEGIN_MMU_FTR_SECTION
 	bl	do_slb_fault
 	cmpdi	r3,0
 	bne-	1f
-	b	fast_exception_return
+	b	fast_interrupt_return
 1:	/* Error case */
 MMU_FTR_SECTION_ELSE
 	/* Radix case, access is outside page table range */
@@ -1363,7 +1364,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	ld	r5,RESULT(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	do_bad_slb_fault
-	b	ret_from_except
+	b	interrupt_return
 
 
 EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
@@ -1416,7 +1417,7 @@ EXC_COMMON_BEGIN(alignment_common)
 	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	alignment_exception
-	b	ret_from_except
+	b	interrupt_return
 
 
 EXC_REAL(program_check, 0x700, 0x100)
@@ -1454,7 +1455,7 @@ EXC_COMMON_BEGIN(program_check_common)
 	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	program_check_exception
-	b	ret_from_except
+	b	interrupt_return
 
 
 EXC_REAL(fp_unavailable, 0x800, 0x100)
@@ -1479,14 +1480,14 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
 	bl	load_up_fpu
-	b	fast_exception_return
+	b	fast_interrupt_return
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2:	/* User process was in a transaction */
 	bl	save_nvgprs
 	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	fp_unavailable_tm
-	b	ret_from_except
+	b	interrupt_return
 #endif
 
 
@@ -1676,7 +1677,7 @@ BEGIN_MMU_FTR_SECTION
 MMU_FTR_SECTION_ELSE
 	bl      unknown_exception
 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
-	b       ret_from_except
+	b       interrupt_return
 
 
 EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0x20)
@@ -1744,7 +1745,7 @@ EXC_COMMON_BEGIN(hmi_exception_common)
 	RUNLATCH_ON
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	handle_hmi_exception
-	b	ret_from_except
+	b	interrupt_return
 
 EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED)
 EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED)
@@ -1792,14 +1793,14 @@ BEGIN_FTR_SECTION
   END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
 #endif
 	bl	load_up_altivec
-	b	fast_exception_return
+	b	fast_interrupt_return
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2:	/* User process was in a transaction */
 	bl	save_nvgprs
 	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	altivec_unavailable_tm
-	b	ret_from_except
+	b	interrupt_return
 #endif
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
@@ -1808,7 +1809,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	altivec_unavailable_exception
-	b	ret_from_except
+	b	interrupt_return
 
 
 EXC_REAL_OOL(vsx_unavailable, 0xf40, 0x20)
@@ -1835,7 +1836,7 @@ BEGIN_FTR_SECTION
 	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	vsx_unavailable_tm
-	b	ret_from_except
+	b	interrupt_return
 #endif
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
@@ -1844,7 +1845,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	vsx_unavailable_exception
-	b	ret_from_except
+	b	interrupt_return
 
 
 EXC_REAL_OOL(facility_unavailable, 0xf60, 0x20)
@@ -2046,7 +2047,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
 	RECONCILE_IRQ_STATE(r10, r11)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	soft_nmi_interrupt
-	b	ret_from_except
+	b	interrupt_return
 
 #else /* CONFIG_PPC_WATCHDOG */
 #define MASKED_DEC_HANDLER_LABEL 2f /* normal return */
@@ -2329,7 +2330,7 @@ do_hash_page:
         cmpdi	r3,0			/* see if __hash_page succeeded */
 
 	/* Success */
-	beq	fast_exc_return_irq	/* Return from exception on success */
+	beq	interrupt_return_lite	/* Return from exception on success */
 
 	/* Error */
 	blt-	13f
@@ -2344,16 +2345,16 @@ handle_page_fault:
 	bne-    handle_dabr_fault
 	ld	r4,_DAR(r1)
 	ld	r5,_DSISR(r1)
+	bl	save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	do_page_fault
 	cmpdi	r3,0
-	beq+	ret_from_except_lite
-	bl	save_nvgprs
+	beq+	interrupt_return_lite
 	mr	r5,r3
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	lwz	r4,_DAR(r1)
 	bl	bad_page_fault
-	b	ret_from_except
+	b	interrupt_return
 
 /* We have a data breakpoint exception - handle it */
 handle_dabr_fault:
@@ -2365,9 +2366,9 @@ handle_dabr_fault:
 	/*
 	 * do_break() may have changed the NV GPRS while handling a breakpoint.
 	 * If so, we need to restore them with their updated values. Don't use
-	 * ret_from_except_lite here.
+	 * interrupt_return_lite here.
 	 */
-	b       ret_from_except
+	b       interrupt_return
 
 
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -2379,7 +2380,7 @@ handle_dabr_fault:
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r4,_DAR(r1)
 	bl	low_hash_fault
-	b	ret_from_except
+	b	interrupt_return
 #endif
 
 /*
@@ -2394,7 +2395,7 @@ handle_dabr_fault:
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	li	r5,SIGSEGV
 	bl	bad_page_fault
-	b	ret_from_except
+	b	interrupt_return
 
 /*
  * When doorbell is triggered from system reset wakeup, the message is
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index d42519b86ddd..1e7c3e47a8b8 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -161,13 +161,17 @@ unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs)
 		__mtmsrd(MSR_RI, 1);
 		local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
 		local_irq_enable();
-		/* Took an interrupt which may have more exit work to do. */
+		/* Took an interrupt, may have more exit work to do. */
 		goto again;
 	}
 	trace_hardirqs_on();
 	local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
 	irq_soft_mask_set(IRQS_ENABLED);
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	local_paca->tm_scratch = regs->msr;
+#endif
+
 	kuap_check_amr();
 
 	account_cpu_user_exit();
@@ -175,3 +179,144 @@ unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs)
 	return ret;
 }
 
+unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr)
+{
+#ifdef CONFIG_PPC_BOOK3E
+	struct thread_struct *ts = &current->thread;
+#endif
+	unsigned long *ti_flagsp = &current_thread_info()->flags;
+	unsigned long ti_flags;
+	unsigned long flags;
+	unsigned long ret = 0;
+
+	BUG_ON(!FULL_REGS(regs));
+	BUG_ON(!(regs->msr & MSR_RI));
+	BUG_ON(regs->softe != IRQS_ENABLED);
+
+	local_irq_save(flags);
+
+again:
+	ti_flags = READ_ONCE(*ti_flagsp);
+	while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+		local_irq_enable(); /* returning to user: may enable */
+		if (ti_flags & _TIF_NEED_RESCHED) {
+			schedule();
+		} else {
+			if (ti_flags & _TIF_SIGPENDING)
+				ret |= _TIF_RESTOREALL;
+			do_notify_resume(regs, ti_flags);
+		}
+		local_irq_disable();
+		ti_flags = READ_ONCE(*ti_flagsp);
+	}
+
+#ifdef CONFIG_PPC_BOOK3S
+	if (IS_ENABLED(CONFIG_PPC_FPU)) {
+		unsigned long mathflags = MSR_FP;
+
+		if (IS_ENABLED(CONFIG_ALTIVEC))
+			mathflags |= MSR_VEC;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+		if (ti_flags & _TIF_RESTORE_TM)
+			restore_tm_state(regs);
+		else
+#endif
+		if ((regs->msr & mathflags) != mathflags)
+			restore_math(regs);
+	}
+#endif
+
+	__mtmsrd(0, 1);	/* Disable MSR_EE and MSR_RI */
+	if (unlikely(lazy_irq_pending())) {
+		__mtmsrd(MSR_RI, 1);
+		local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+		local_irq_enable();
+		local_irq_disable();
+		/* Took an interrupt, may have more exit work to do. */
+		goto again;
+	}
+	trace_hardirqs_on();
+	local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+	irq_soft_mask_set(IRQS_ENABLED);
+
+#ifdef CONFIG_PPC_BOOK3E
+	if (unlikely(ts->debug.debug.dbcr0 & DBCR0_IDM)) {
+		/*
+		 * Check to see if the dbcr0 register is set up to debug.
+		 * Use the internal debug mode bit to do this.
+		 */
+		mtmsr(mfmsr() & ~MSR_DE);
+		mtspr(SPRN_DBCR0, ts->debug.debug.dbcr0);
+		mtspr(SPRN_DBSR, -1);
+	}
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	local_paca->tm_scratch = regs->msr;
+#endif
+
+	kuap_check_amr();
+
+	account_cpu_user_exit();
+
+	return ret;
+}
+
+void unrecoverable_exception(struct pt_regs *regs);
+void preempt_schedule_irq(void);
+
+
+unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr)
+{
+	unsigned long *ti_flagsp = &current_thread_info()->flags;
+	unsigned long flags;
+
+	BUG_ON(!FULL_REGS(regs));
+	local_irq_save(flags);
+
+	if (unlikely(!(regs->msr & MSR_RI)))
+		unrecoverable_exception(regs);
+
+again:
+	if (IS_ENABLED(CONFIG_PREEMPT)) {
+		/* Return to preemptible kernel context */
+		if (unlikely(*ti_flagsp & _TIF_NEED_RESCHED)) {
+			if (preempt_count() == 0 && regs->softe == IRQS_ENABLED)
+				preempt_schedule_irq();
+		}
+	}
+
+	__mtmsrd(0, 1);	/* Disable MSR_EE and MSR_RI */
+	if (regs->softe == IRQS_ENABLED) {
+		if (unlikely(lazy_irq_pending())) {
+			__mtmsrd(MSR_RI, 1);
+			local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+			local_irq_enable();
+			local_irq_disable();
+			/* Took an interrupt, may have more exit work to do. */
+			goto again;
+		}
+		trace_hardirqs_on();
+		irq_soft_mask_set(IRQS_ENABLED);
+	}
+	if (regs->msr & MSR_EE)
+		local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	local_paca->tm_scratch = regs->msr;
+#endif
+
+	/*
+	 * We don't need to restore AMR on the way back to userspace for KUAP.
+	 * The value of AMR only matters while we're in the kernel.
+	 */
+	kuap_restore_amr(regs);
+
+	if (unlikely(*ti_flagsp & _TIF_EMULATE_STACK_STORE)) {
+		clear_bits(_TIF_EMULATE_STACK_STORE, ti_flagsp);
+		return 1;
+	}
+	return 0;
+}
+
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 8eb867dbad5f..44e7a776e56f 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -131,7 +131,7 @@ _GLOBAL(load_up_vsx)
 	/* enable use of VSX after return */
 	oris	r12,r12,MSR_VSX@h
 	std	r12,_MSR(r1)
-	b	fast_exception_return
+	b	fast_interrupt_return
 
 #endif /* CONFIG_VSX */
 
-- 
2.22.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] powerpc/64: interrupt return in C
  2019-08-28  9:06 [PATCH] powerpc/64: interrupt return in C Nicholas Piggin
@ 2019-08-28  9:28 ` Christophe Leroy
  2019-08-28 11:56   ` Nicholas Piggin
  2019-09-08 10:50 ` Nicholas Piggin
  1 sibling, 1 reply; 4+ messages in thread
From: Christophe Leroy @ 2019-08-28  9:28 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev



Le 28/08/2019 à 11:06, Nicholas Piggin a écrit :
> This is a work in progress that goes on top of the syscalls in C patch.
> It's not quite complete, 64e low level exit is not taken care of, and
> the new return is hacked into the existing interrupt handlers pretty
> quickly (e.g., full gprs handling is still ugly and could be cleaned),
> but that code touches exception-64s.S which is under heavy modification
> in parallel so I will rebase on top of that before polishing it properly.
> I will also try to convert to more IS_ENABLED() for Christophe.

I am flattered.

I guess at the end you are not doing it for me but because it is on 
purpose in your code improvement process.

See 
https://www.kernel.org/doc/html/latest/process/coding-style.html#conditional-compilation

Christophe

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] powerpc/64: interrupt return in C
  2019-08-28  9:28 ` Christophe Leroy
@ 2019-08-28 11:56   ` Nicholas Piggin
  0 siblings, 0 replies; 4+ messages in thread
From: Nicholas Piggin @ 2019-08-28 11:56 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev

Christophe Leroy's on August 28, 2019 7:28 pm:
> 
> 
> Le 28/08/2019 à 11:06, Nicholas Piggin a écrit :
>> This is a work in progress that goes on top of the syscalls in C patch.
>> It's not quite complete, 64e low level exit is not taken care of, and
>> the new return is hacked into the existing interrupt handlers pretty
>> quickly (e.g., full gprs handling is still ugly and could be cleaned),
>> but that code touches exception-64s.S which is under heavy modification
>> in parallel so I will rebase on top of that before polishing it properly.
>> I will also try to convert to more IS_ENABLED() for Christophe.
> 
> I am flattered.
> 
> I guess at the end you are not doing it for me but because it is on 
> purpose in your code improvement process.

Heh :) Yeah I agree it's almost always nicer code. Sometimes I just
forget about it so I appreciate you pointing out improvements.

Thanks,
Nick


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] powerpc/64: interrupt return in C
  2019-08-28  9:06 [PATCH] powerpc/64: interrupt return in C Nicholas Piggin
  2019-08-28  9:28 ` Christophe Leroy
@ 2019-09-08 10:50 ` Nicholas Piggin
  1 sibling, 0 replies; 4+ messages in thread
From: Nicholas Piggin @ 2019-09-08 10:50 UTC (permalink / raw)
  To: linuxppc-dev

Nicholas Piggin's on August 28, 2019 7:06 pm:
> This is a work in progress that goes on top of the syscalls in C patch.
> It's not quite complete, 64e low level exit is not taken care of, and
> the new return is hacked into the existing interrupt handlers pretty
> quickly (e.g., full gprs handling is still ugly and could be cleaned),

In performance testing on a POWER9, page fault latency is about 1.1%
faster with interrupt returns in C which always saves NVGPRs. So I
think the plan will be to rip out all the NVGPR save optimisation from
the 64-bit code.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, back to index

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-28  9:06 [PATCH] powerpc/64: interrupt return in C Nicholas Piggin
2019-08-28  9:28 ` Christophe Leroy
2019-08-28 11:56   ` Nicholas Piggin
2019-09-08 10:50 ` Nicholas Piggin

LinuxPPC-Dev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linuxppc-dev/0 linuxppc-dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linuxppc-dev linuxppc-dev/ https://lore.kernel.org/linuxppc-dev \
		linuxppc-dev@lists.ozlabs.org linuxppc-dev@ozlabs.org linuxppc-dev@archiver.kernel.org
	public-inbox-index linuxppc-dev


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.ozlabs.lists.linuxppc-dev


AGPL code for this site: git clone https://public-inbox.org/ public-inbox