linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH v1] power: don't manage floating point regs when no FPU
@ 2020-08-07 16:13 Christophe Leroy
  2020-08-11 12:07 ` Michael Ellerman
  0 siblings, 1 reply; 4+ messages in thread
From: Christophe Leroy @ 2020-08-07 16:13 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman
  Cc: linux-kernel, linuxppc-dev

There is no point in copying floating point regs when there
is no FPU and MATH_EMULATION is not selected.

Create a new CONFIG_PPC_FPU_REGS bool that is selected by
CONFIG_MATH_EMULATION and CONFIG_PPC_FPU, and use it to
opt out everything related to fp_state in thread_struct.

The following app runs in approx 10.50 seconds on an 8xx without
the patch, and in 9.45 seconds with the patch.

	void sigusr1(int sig) { }

	int main(int argc, char **argv)
	{
		int i = 100000;

		signal(SIGUSR1, sigusr1);
		for (;i--;)
			raise(SIGUSR1);
		exit(0);
	}

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/Kconfig                      |  1 +
 arch/powerpc/include/asm/processor.h      |  2 ++
 arch/powerpc/kernel/asm-offsets.c         |  2 ++
 arch/powerpc/kernel/process.c             |  4 ++++
 arch/powerpc/kernel/ptrace/ptrace-novsx.c |  8 ++++++++
 arch/powerpc/kernel/ptrace/ptrace.c       |  4 ++++
 arch/powerpc/kernel/signal.c              | 12 +++++++++++-
 arch/powerpc/kernel/signal_32.c           |  4 ++++
 arch/powerpc/kernel/traps.c               |  4 ++++
 arch/powerpc/platforms/Kconfig.cputype    |  4 ++++
 10 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1f48bbfb3ce9..a2611880b904 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -416,6 +416,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE
 config MATH_EMULATION
 	bool "Math emulation"
 	depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE
+	select PPC_FPU_REGS
 	help
 	  Some PowerPC chips designed for embedded applications do not have
 	  a floating-point unit and therefore do not implement the
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index ed0d633ab5aa..e20b0c5abe62 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -175,8 +175,10 @@ struct thread_struct {
 #endif
 	/* Debug Registers */
 	struct debug_reg debug;
+#ifdef CONFIG_PPC_FPU_REGS
 	struct thread_fp_state	fp_state;
 	struct thread_fp_state	*fp_save_area;
+#endif
 	int		fpexc_mode;	/* floating-point exception mode */
 	unsigned int	align_ctl;	/* alignment handling control */
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8711c2164b45..6cb36c341c70 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -110,9 +110,11 @@ int main(void)
 #ifdef CONFIG_BOOKE
 	OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]);
 #endif
+#ifdef CONFIG_PPC_FPU
 	OFFSET(THREAD_FPEXC_MODE, thread_struct, fpexc_mode);
 	OFFSET(THREAD_FPSTATE, thread_struct, fp_state.fpr);
 	OFFSET(THREAD_FPSAVEAREA, thread_struct, fp_save_area);
+#endif
 	OFFSET(FPSTATE_FPSCR, thread_fp_state, fpscr);
 	OFFSET(THREAD_LOAD_FP, thread_struct, load_fp);
 #ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 016bd831908e..7e0082ac0a39 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1694,7 +1694,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 		p->thread.ptrace_bps[i] = NULL;
 #endif
 
+#ifdef CONFIG_PPC_FPU_REGS
 	p->thread.fp_save_area = NULL;
+#endif
 #ifdef CONFIG_ALTIVEC
 	p->thread.vr_save_area = NULL;
 #endif
@@ -1821,8 +1823,10 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 #endif
 	current->thread.load_slb = 0;
 	current->thread.load_fp = 0;
+#ifdef CONFIG_PPC_FPU_REGS
 	memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
 	current->thread.fp_save_area = NULL;
+#endif
 #ifdef CONFIG_ALTIVEC
 	memset(&current->thread.vr_state, 0, sizeof(current->thread.vr_state));
 	current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */
diff --git a/arch/powerpc/kernel/ptrace/ptrace-novsx.c b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
index b2dc4e92d11a..8f87a11f3f8c 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-novsx.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
@@ -21,6 +21,7 @@
 int fpr_get(struct task_struct *target, const struct user_regset *regset,
 	    unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf)
 {
+#ifdef CONFIG_PPC_FPU_REGS
 	BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
 		     offsetof(struct thread_fp_state, fpr[32]));
 
@@ -28,6 +29,9 @@ int fpr_get(struct task_struct *target, const struct user_regset *regset,
 
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 				   &target->thread.fp_state, 0, -1);
+#else
+	return 0;
+#endif
 }
 
 /*
@@ -47,6 +51,7 @@ int fpr_set(struct task_struct *target, const struct user_regset *regset,
 	    unsigned int pos, unsigned int count,
 	    const void *kbuf, const void __user *ubuf)
 {
+#ifdef CONFIG_PPC_FPU_REGS
 	BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
 		     offsetof(struct thread_fp_state, fpr[32]));
 
@@ -54,4 +59,7 @@ int fpr_set(struct task_struct *target, const struct user_regset *regset,
 
 	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 				  &target->thread.fp_state, 0, -1);
+#else
+	return 0;
+#endif
 }
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
index f6e51be47c6e..f162bfec0d3f 100644
--- a/arch/powerpc/kernel/ptrace/ptrace.c
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -70,6 +70,7 @@ long arch_ptrace(struct task_struct *child, long request,
 			ret = ptrace_get_reg(child, (int) index, &tmp);
 			if (ret)
 				break;
+#ifdef CONFIG_PPC_FPU_REGS
 		} else {
 			unsigned int fpidx = index - PT_FPR0;
 
@@ -79,6 +80,7 @@ long arch_ptrace(struct task_struct *child, long request,
 				       sizeof(long));
 			else
 				tmp = child->thread.fp_state.fpscr;
+#endif
 		}
 		ret = put_user(tmp, datalp);
 		break;
@@ -103,6 +105,7 @@ long arch_ptrace(struct task_struct *child, long request,
 		CHECK_FULL_REGS(child->thread.regs);
 		if (index < PT_FPR0) {
 			ret = ptrace_put_reg(child, index, data);
+#ifdef CONFIG_PPC_FPU_REGS
 		} else {
 			unsigned int fpidx = index - PT_FPR0;
 
@@ -113,6 +116,7 @@ long arch_ptrace(struct task_struct *child, long request,
 			else
 				child->thread.fp_state.fpscr = data;
 			ret = 0;
+#endif
 		}
 		break;
 	}
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index d15a98c758b8..18dcbf538f8f 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -133,7 +133,7 @@ unsigned long copy_ckvsx_from_user(struct task_struct *task,
 	return 0;
 }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-#else
+#elif defined(CONFIG_PPC_FPU_REGS)
 inline unsigned long copy_fpr_to_user(void __user *to,
 				      struct task_struct *task)
 {
@@ -163,6 +163,16 @@ inline unsigned long copy_ckfpr_from_user(struct task_struct *task,
 				ELF_NFPREG * sizeof(double));
 }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#else
+inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task)
+{
+	return 0;
+}
+
+inline unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from)
+{
+	return 0;
+}
 #endif
 
 /* Log an error when sending an unhandled signal to a process. Controlled
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 96950f189b5a..7b291707eb31 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -814,7 +814,9 @@ int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
 	}
 	regs->link = tramp;
 
+#ifdef CONFIG_PPC_FPU_REGS
 	tsk->thread.fp_state.fpscr = 0;	/* turn off all fp exceptions */
+#endif
 
 	/* create a stack frame for the caller of the handler */
 	newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16);
@@ -1271,7 +1273,9 @@ int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
 
 	regs->link = tramp;
 
+#ifdef CONFIG_PPC_FPU_REGS
 	tsk->thread.fp_state.fpscr = 0;	/* turn off all fp exceptions */
+#endif
 
 	/* create a stack frame for the caller of the handler */
 	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index d1ebe152f210..ee9ec61e75b7 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1188,6 +1188,7 @@ static inline int __parse_fpscr(unsigned long fpscr)
 	return ret;
 }
 
+#ifdef CONFIG_PPC_FPU
 static void parse_fpe(struct pt_regs *regs)
 {
 	int code = 0;
@@ -1198,6 +1199,7 @@ static void parse_fpe(struct pt_regs *regs)
 
 	_exception(SIGFPE, regs, code, regs->nip);
 }
+#endif
 
 /*
  * Illegal instruction emulation support.  Originally written to
@@ -1477,11 +1479,13 @@ void program_check_exception(struct pt_regs *regs)
 	/* We can now get here via a FP Unavailable exception if the core
 	 * has no FPU, in that case the reason flags will be 0 */
 
+#ifdef CONFIG_PPC_FPU
 	if (reason & REASON_FP) {
 		/* IEEE FP exception */
 		parse_fpe(regs);
 		goto bail;
 	}
+#endif
 	if (reason & REASON_TRAP) {
 		unsigned long bugaddr;
 		/* Debugger is first in line to stop recursive faults in
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 87737ec86d39..40ffcdba42b8 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -225,9 +225,13 @@ config PPC_E500MC
 	  such as e5500/e6500), and must be disabled for running on
 	  e500v1 or e500v2.
 
+config PPC_FPU_REGS
+	bool
+
 config PPC_FPU
 	bool
 	default y if PPC64
+	select PPC_FPU_REGS
 
 config FSL_EMB_PERFMON
 	bool "Freescale Embedded Perfmon"
-- 
2.25.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [RFC PATCH v1] power: don't manage floating point regs when no FPU
  2020-08-07 16:13 [RFC PATCH v1] power: don't manage floating point regs when no FPU Christophe Leroy
@ 2020-08-11 12:07 ` Michael Ellerman
  2020-08-11 13:48   ` Christophe Leroy
  2020-08-11 14:06   ` Christophe Leroy
  0 siblings, 2 replies; 4+ messages in thread
From: Michael Ellerman @ 2020-08-11 12:07 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras
  Cc: linux-kernel, linuxppc-dev

Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> There is no point in copying floating point regs when there
> is no FPU and MATH_EMULATION is not selected.

Yeah I guess you're right. I've never touched a system with neither, but
if such a thing exists then it does seem silly to copy regs around that
can't be used.

> Create a new CONFIG_PPC_FPU_REGS bool that is selected by
> CONFIG_MATH_EMULATION and CONFIG_PPC_FPU, and use it to
> opt out everything related to fp_state in thread_struct.
>
> The following app runs in approx 10.50 seconds on an 8xx without
> the patch, and in 9.45 seconds with the patch.
>
> 	void sigusr1(int sig) { }
>
> 	int main(int argc, char **argv)
> 	{
> 		int i = 100000;
>
> 		signal(SIGUSR1, sigusr1);
> 		for (;i--;)
> 			raise(SIGUSR1);
> 		exit(0);
> 	}
>
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
>  arch/powerpc/Kconfig                      |  1 +
>  arch/powerpc/include/asm/processor.h      |  2 ++
>  arch/powerpc/kernel/asm-offsets.c         |  2 ++
>  arch/powerpc/kernel/process.c             |  4 ++++
>  arch/powerpc/kernel/ptrace/ptrace-novsx.c |  8 ++++++++
>  arch/powerpc/kernel/ptrace/ptrace.c       |  4 ++++
>  arch/powerpc/kernel/signal.c              | 12 +++++++++++-
>  arch/powerpc/kernel/signal_32.c           |  4 ++++
>  arch/powerpc/kernel/traps.c               |  4 ++++
>  arch/powerpc/platforms/Kconfig.cputype    |  4 ++++
>  10 files changed, 44 insertions(+), 1 deletion(-)

In general this looks fine.

It's a bit #ifdef heavy. Maybe some of those can be cleaned up a bit
with some wrapper inlines?

> diff --git a/arch/powerpc/kernel/ptrace/ptrace-novsx.c b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
> index b2dc4e92d11a..8f87a11f3f8c 100644
> --- a/arch/powerpc/kernel/ptrace/ptrace-novsx.c
> +++ b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
> @@ -28,6 +29,9 @@ int fpr_get(struct task_struct *target, const struct user_regset *regset,
>  
>  	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
>  				   &target->thread.fp_state, 0, -1);
> +#else
> +	return 0;
> +#endif

Should we return -ENODEV/EIO here? Wonder if another arch can give us a clue.

cheers

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC PATCH v1] power: don't manage floating point regs when no FPU
  2020-08-11 12:07 ` Michael Ellerman
@ 2020-08-11 13:48   ` Christophe Leroy
  2020-08-11 14:06   ` Christophe Leroy
  1 sibling, 0 replies; 4+ messages in thread
From: Christophe Leroy @ 2020-08-11 13:48 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras
  Cc: linux-kernel, linuxppc-dev



Le 11/08/2020 à 14:07, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>> There is no point in copying floating point regs when there
>> is no FPU and MATH_EMULATION is not selected.
> 
> Yeah I guess you're right. I've never touched a system with neither, but
> if such a thing exists then it does seem silly to copy regs around that
> can't be used.

Yes that exists, because glibc implements floating point emulation and 
it is definitely more efficient to rely of glibc emulation than kernel one.

>>   10 files changed, 44 insertions(+), 1 deletion(-)
> 
> In general this looks fine.
> 
> It's a bit #ifdef heavy. Maybe some of those can be cleaned up a bit
> with some wrapper inlines?
> 

Yes I'll try and respin, as part of a series I'm preparing to switch the 
32 bits signal code to using user_access_begin() logic and 
unsafe_put_user() and friends to reduce KUAP unlock/lock.

Christophe

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC PATCH v1] power: don't manage floating point regs when no FPU
  2020-08-11 12:07 ` Michael Ellerman
  2020-08-11 13:48   ` Christophe Leroy
@ 2020-08-11 14:06   ` Christophe Leroy
  1 sibling, 0 replies; 4+ messages in thread
From: Christophe Leroy @ 2020-08-11 14:06 UTC (permalink / raw)
  To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras
  Cc: linux-kernel, linuxppc-dev



Le 11/08/2020 à 14:07, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>>   10 files changed, 44 insertions(+), 1 deletion(-)
> 
> In general this looks fine.
> 
> It's a bit #ifdef heavy. Maybe some of those can be cleaned up a bit
> with some wrapper inlines?

Looking at it once more, looks like more or less the same level of 
#ifdefs as things like CONFIG_ALTIVEC for instance. I can't really see 
much opportunities to clean it up.

> 
>> diff --git a/arch/powerpc/kernel/ptrace/ptrace-novsx.c b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
>> index b2dc4e92d11a..8f87a11f3f8c 100644
>> --- a/arch/powerpc/kernel/ptrace/ptrace-novsx.c
>> +++ b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
>> @@ -28,6 +29,9 @@ int fpr_get(struct task_struct *target, const struct user_regset *regset,
>>   
>>   	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
>>   				   &target->thread.fp_state, 0, -1);
>> +#else
>> +	return 0;
>> +#endif
> 
> Should we return -ENODEV/EIO here? Wonder if another arch can give us a clue.
> 

Looks like we have to do another way  ... another #ifdef ... in the 
definition of native_regsets[] in ptrace-view.c . And then we should be 
able to not build ptrace-novsx.c at all. Will try that.

Christophe

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-08-11 14:06 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-07 16:13 [RFC PATCH v1] power: don't manage floating point regs when no FPU Christophe Leroy
2020-08-11 12:07 ` Michael Ellerman
2020-08-11 13:48   ` Christophe Leroy
2020-08-11 14:06   ` Christophe Leroy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).