linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5] powerpc/irq: inline call_do_irq() and call_do_softirq() on PPC32
@ 2019-12-07 17:20 Christophe Leroy
  2019-12-12 12:52 ` Christoph Hellwig
  2020-06-01  7:26 ` Christophe Leroy
  0 siblings, 2 replies; 4+ messages in thread
From: Christophe Leroy @ 2019-12-07 17:20 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

call_do_irq() and call_do_softirq() are simple enough to be
worth inlining.

Inlining them avoids an mflr/mtlr pair plus a save/reload on stack.
It also allows GCC to keep the saved ksp_limit in an nonvolatile reg.

This is inspired from S390 arch. Several other arches do more or
less the same. The way sparc arch does seems odd thought.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>

---
v2: no change.
v3: no change.
v4:
- comment reminding the purpose of the inline asm block.
- added r2 as clobbered reg
v5:
- Limiting the change to PPC32 for now.
- removed r2 from the clobbered regs list (on PPC32 r2 points to current all the time)
- Removed patch 1 and merged ksp_limit handling in here.
---
 arch/powerpc/include/asm/irq.h |  2 ++
 arch/powerpc/kernel/irq.c      | 48 ++++++++++++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/misc_32.S  | 39 ----------------------------------
 3 files changed, 50 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 814dfab7e392..e4a92f0b4ad4 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -56,8 +56,10 @@ extern void *mcheckirq_ctx[NR_CPUS];
 extern void *hardirq_ctx[NR_CPUS];
 extern void *softirq_ctx[NR_CPUS];
 
+#ifdef CONFIG_PPC64
 void call_do_softirq(void *sp);
 void call_do_irq(struct pt_regs *regs, void *sp);
+#endif
 extern void do_IRQ(struct pt_regs *regs);
 extern void __init init_IRQ(void);
 extern void __do_irq(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5645bc9cbc09..240eca12c71d 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -611,6 +611,54 @@ static inline void check_stack_overflow(void)
 #endif
 }
 
+#ifdef CONFIG_PPC32
+static inline void call_do_softirq(const void *sp)
+{
+	register unsigned long ret asm("r3");
+	unsigned long limit = current->thread.ksp_limit;
+
+	/* Adjust the stack limit */
+	current->thread.ksp_limit = (unsigned long)sp;
+
+	/* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
+	asm volatile(
+		"	"PPC_STLU"	1, %2(%1);\n"
+		"	mr		1, %1;\n"
+		"	bl		%3;\n"
+		"	"PPC_LL"	1, 0(1);\n" :
+		"=r"(ret) :
+		"b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_softirq) :
+		"lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
+		"r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+
+	/* Restore stack limit */
+	current->thread.ksp_limit = limit;
+}
+
+static inline void call_do_irq(struct pt_regs *regs, void *sp)
+{
+	register unsigned long r3 asm("r3") = (unsigned long)regs;
+	unsigned long limit = current->thread.ksp_limit;
+
+	/* Adjust the stack limit */
+	current->thread.ksp_limit = (unsigned long)sp;
+
+	/* Temporarily switch r1 to sp, call __do_irq() then restore r1 */
+	asm volatile(
+		"	"PPC_STLU"	1, %2(%1);\n"
+		"	mr		1, %1;\n"
+		"	bl		%3;\n"
+		"	"PPC_LL"	1, 0(1);\n" :
+		"+r"(r3) :
+		"b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_irq) :
+		"lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
+		"r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+
+	/* Restore stack limit */
+	current->thread.ksp_limit = limit;
+}
+#endif
+
 void __do_irq(struct pt_regs *regs)
 {
 	unsigned int irq;
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index d80212be8698..341a3cd199cb 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -28,45 +28,6 @@
 	.text
 
 /*
- * We store the saved ksp_limit in the unused part
- * of the STACK_FRAME_OVERHEAD
- */
-_GLOBAL(call_do_softirq)
-	mflr	r0
-	stw	r0,4(r1)
-	lwz	r10,THREAD+KSP_LIMIT(r2)
-	stw	r3, THREAD+KSP_LIMIT(r2)
-	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
-	mr	r1,r3
-	stw	r10,8(r1)
-	bl	__do_softirq
-	lwz	r10,8(r1)
-	lwz	r1,0(r1)
-	lwz	r0,4(r1)
-	stw	r10,THREAD+KSP_LIMIT(r2)
-	mtlr	r0
-	blr
-
-/*
- * void call_do_irq(struct pt_regs *regs, void *sp);
- */
-_GLOBAL(call_do_irq)
-	mflr	r0
-	stw	r0,4(r1)
-	lwz	r10,THREAD+KSP_LIMIT(r2)
-	stw	r4, THREAD+KSP_LIMIT(r2)
-	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
-	mr	r1,r4
-	stw	r10,8(r1)
-	bl	__do_irq
-	lwz	r10,8(r1)
-	lwz	r1,0(r1)
-	lwz	r0,4(r1)
-	stw	r10,THREAD+KSP_LIMIT(r2)
-	mtlr	r0
-	blr
-
-/*
  * This returns the high 64 bits of the product of two 64-bit numbers.
  */
 _GLOBAL(mulhdu)
-- 
2.13.3


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v5] powerpc/irq: inline call_do_irq() and call_do_softirq() on PPC32
  2019-12-07 17:20 [PATCH v5] powerpc/irq: inline call_do_irq() and call_do_softirq() on PPC32 Christophe Leroy
@ 2019-12-12 12:52 ` Christoph Hellwig
  2019-12-12 16:34   ` Christophe Leroy
  2020-06-01  7:26 ` Christophe Leroy
  1 sibling, 1 reply; 4+ messages in thread
From: Christoph Hellwig @ 2019-12-12 12:52 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	linuxppc-dev, linux-kernel

On Sat, Dec 07, 2019 at 05:20:04PM +0000, Christophe Leroy wrote:
> call_do_irq() and call_do_softirq() are simple enough to be
> worth inlining.
> 
> Inlining them avoids an mflr/mtlr pair plus a save/reload on stack.
> It also allows GCC to keep the saved ksp_limit in an nonvolatile reg.
> 
> This is inspired from S390 arch. Several other arches do more or
> less the same. The way sparc arch does seems odd thought.

Any reason you only do this for 32-bit and not 64-bit as well?

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v5] powerpc/irq: inline call_do_irq() and call_do_softirq() on PPC32
  2019-12-12 12:52 ` Christoph Hellwig
@ 2019-12-12 16:34   ` Christophe Leroy
  0 siblings, 0 replies; 4+ messages in thread
From: Christophe Leroy @ 2019-12-12 16:34 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	linuxppc-dev, linux-kernel



Le 12/12/2019 à 13:52, Christoph Hellwig a écrit :
> On Sat, Dec 07, 2019 at 05:20:04PM +0000, Christophe Leroy wrote:
>> call_do_irq() and call_do_softirq() are simple enough to be
>> worth inlining.
>>
>> Inlining them avoids an mflr/mtlr pair plus a save/reload on stack.
>> It also allows GCC to keep the saved ksp_limit in an nonvolatile reg.
>>
>> This is inspired from S390 arch. Several other arches do more or
>> less the same. The way sparc arch does seems odd thought.
> 
> Any reason you only do this for 32-bit and not 64-bit as well?
> 

Yes ... There has been a long discussion on this in v4, see 
https://patchwork.ozlabs.org/patch/1174288/

The problem is that on PPC64, r2 register is used as TOC pointer and it 
is apparently not straithforward to make sure the caller and the callee 
are using the same TOC.

On PPC32 it's more simple, r2 is current task_struct at all time, it 
never changes.

Christophe

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v5] powerpc/irq: inline call_do_irq() and call_do_softirq() on PPC32
  2019-12-07 17:20 [PATCH v5] powerpc/irq: inline call_do_irq() and call_do_softirq() on PPC32 Christophe Leroy
  2019-12-12 12:52 ` Christoph Hellwig
@ 2020-06-01  7:26 ` Christophe Leroy
  1 sibling, 0 replies; 4+ messages in thread
From: Christophe Leroy @ 2020-06-01  7:26 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Benjamin Herrenschmidt, Paul Mackerras, linuxppc-dev, linux-kernel

Hi Michael,

Le 07/12/2019 à 18:20, Christophe Leroy a écrit :
> call_do_irq() and call_do_softirq() are simple enough to be
> worth inlining.
> 
> Inlining them avoids an mflr/mtlr pair plus a save/reload on stack.
> It also allows GCC to keep the saved ksp_limit in an nonvolatile reg.
> 
> This is inspired from S390 arch. Several other arches do more or
> less the same. The way sparc arch does seems odd thought.
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>

Anything more I need to do for this patch to get merged ?

Thanks
Christophe


> 
> ---
> v2: no change.
> v3: no change.
> v4:
> - comment reminding the purpose of the inline asm block.
> - added r2 as clobbered reg
> v5:
> - Limiting the change to PPC32 for now.
> - removed r2 from the clobbered regs list (on PPC32 r2 points to current all the time)
> - Removed patch 1 and merged ksp_limit handling in here.
> ---
>   arch/powerpc/include/asm/irq.h |  2 ++
>   arch/powerpc/kernel/irq.c      | 48 ++++++++++++++++++++++++++++++++++++++++++
>   arch/powerpc/kernel/misc_32.S  | 39 ----------------------------------
>   3 files changed, 50 insertions(+), 39 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
> index 814dfab7e392..e4a92f0b4ad4 100644
> --- a/arch/powerpc/include/asm/irq.h
> +++ b/arch/powerpc/include/asm/irq.h
> @@ -56,8 +56,10 @@ extern void *mcheckirq_ctx[NR_CPUS];
>   extern void *hardirq_ctx[NR_CPUS];
>   extern void *softirq_ctx[NR_CPUS];
>   
> +#ifdef CONFIG_PPC64
>   void call_do_softirq(void *sp);
>   void call_do_irq(struct pt_regs *regs, void *sp);
> +#endif
>   extern void do_IRQ(struct pt_regs *regs);
>   extern void __init init_IRQ(void);
>   extern void __do_irq(struct pt_regs *regs);
> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> index 5645bc9cbc09..240eca12c71d 100644
> --- a/arch/powerpc/kernel/irq.c
> +++ b/arch/powerpc/kernel/irq.c
> @@ -611,6 +611,54 @@ static inline void check_stack_overflow(void)
>   #endif
>   }
>   
> +#ifdef CONFIG_PPC32
> +static inline void call_do_softirq(const void *sp)
> +{
> +	register unsigned long ret asm("r3");
> +	unsigned long limit = current->thread.ksp_limit;
> +
> +	/* Adjust the stack limit */
> +	current->thread.ksp_limit = (unsigned long)sp;
> +
> +	/* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
> +	asm volatile(
> +		"	"PPC_STLU"	1, %2(%1);\n"
> +		"	mr		1, %1;\n"
> +		"	bl		%3;\n"
> +		"	"PPC_LL"	1, 0(1);\n" :
> +		"=r"(ret) :
> +		"b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_softirq) :
> +		"lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
> +		"r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
> +
> +	/* Restore stack limit */
> +	current->thread.ksp_limit = limit;
> +}
> +
> +static inline void call_do_irq(struct pt_regs *regs, void *sp)
> +{
> +	register unsigned long r3 asm("r3") = (unsigned long)regs;
> +	unsigned long limit = current->thread.ksp_limit;
> +
> +	/* Adjust the stack limit */
> +	current->thread.ksp_limit = (unsigned long)sp;
> +
> +	/* Temporarily switch r1 to sp, call __do_irq() then restore r1 */
> +	asm volatile(
> +		"	"PPC_STLU"	1, %2(%1);\n"
> +		"	mr		1, %1;\n"
> +		"	bl		%3;\n"
> +		"	"PPC_LL"	1, 0(1);\n" :
> +		"+r"(r3) :
> +		"b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_irq) :
> +		"lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
> +		"r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
> +
> +	/* Restore stack limit */
> +	current->thread.ksp_limit = limit;
> +}
> +#endif
> +
>   void __do_irq(struct pt_regs *regs)
>   {
>   	unsigned int irq;
> diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
> index d80212be8698..341a3cd199cb 100644
> --- a/arch/powerpc/kernel/misc_32.S
> +++ b/arch/powerpc/kernel/misc_32.S
> @@ -28,45 +28,6 @@
>   	.text
>   
>   /*
> - * We store the saved ksp_limit in the unused part
> - * of the STACK_FRAME_OVERHEAD
> - */
> -_GLOBAL(call_do_softirq)
> -	mflr	r0
> -	stw	r0,4(r1)
> -	lwz	r10,THREAD+KSP_LIMIT(r2)
> -	stw	r3, THREAD+KSP_LIMIT(r2)
> -	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
> -	mr	r1,r3
> -	stw	r10,8(r1)
> -	bl	__do_softirq
> -	lwz	r10,8(r1)
> -	lwz	r1,0(r1)
> -	lwz	r0,4(r1)
> -	stw	r10,THREAD+KSP_LIMIT(r2)
> -	mtlr	r0
> -	blr
> -
> -/*
> - * void call_do_irq(struct pt_regs *regs, void *sp);
> - */
> -_GLOBAL(call_do_irq)
> -	mflr	r0
> -	stw	r0,4(r1)
> -	lwz	r10,THREAD+KSP_LIMIT(r2)
> -	stw	r4, THREAD+KSP_LIMIT(r2)
> -	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
> -	mr	r1,r4
> -	stw	r10,8(r1)
> -	bl	__do_irq
> -	lwz	r10,8(r1)
> -	lwz	r1,0(r1)
> -	lwz	r0,4(r1)
> -	stw	r10,THREAD+KSP_LIMIT(r2)
> -	mtlr	r0
> -	blr
> -
> -/*
>    * This returns the high 64 bits of the product of two 64-bit numbers.
>    */
>   _GLOBAL(mulhdu)
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-06-01  7:26 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-12-07 17:20 [PATCH v5] powerpc/irq: inline call_do_irq() and call_do_softirq() on PPC32 Christophe Leroy
2019-12-12 12:52 ` Christoph Hellwig
2019-12-12 16:34   ` Christophe Leroy
2020-06-01  7:26 ` Christophe Leroy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).