All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] powerpc: Remove empty giveup_altivec function on book3e CPUs
@ 2012-04-16  6:54 Anton Blanchard
  2012-04-16  6:56 ` [PATCH 2/2] powerpc: Optimise enable_kernel_altivec Anton Blanchard
  0 siblings, 1 reply; 2+ messages in thread
From: Anton Blanchard @ 2012-04-16  6:54 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev


Use an empty inline instead of an empty function to implement
giveup_altivec on book3e CPUs, similar to flush_altivec_to_thread.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: linux-build/arch/powerpc/include/asm/switch_to.h
===================================================================
--- linux-build.orig/arch/powerpc/include/asm/switch_to.h	2012-04-16 12:56:11.000000000 +1000
+++ linux-build/arch/powerpc/include/asm/switch_to.h	2012-04-16 12:56:45.149313158 +1000
@@ -21,7 +21,6 @@ extern void disable_kernel_fp(void);
 extern void enable_kernel_fp(void);
 extern void flush_fp_to_thread(struct task_struct *);
 extern void enable_kernel_altivec(void);
-extern void giveup_altivec(struct task_struct *);
 extern void load_up_altivec(struct task_struct *);
 extern int emulate_altivec(struct pt_regs *);
 extern void __giveup_vsx(struct task_struct *);
@@ -40,10 +39,14 @@ static inline void discard_lazy_cpu_stat
 
 #ifdef CONFIG_ALTIVEC
 extern void flush_altivec_to_thread(struct task_struct *);
+extern void giveup_altivec(struct task_struct *);
 #else
 static inline void flush_altivec_to_thread(struct task_struct *t)
 {
 }
+static inline void giveup_altivec(struct task_struct *t)
+{
+}
 #endif
 
 #ifdef CONFIG_VSX
Index: linux-build/arch/powerpc/kernel/head_44x.S
===================================================================
--- linux-build.orig/arch/powerpc/kernel/head_44x.S	2012-04-16 12:56:11.976708702 +1000
+++ linux-build/arch/powerpc/kernel/head_44x.S	2012-04-16 12:56:45.153313231 +1000
@@ -778,14 +778,6 @@ _GLOBAL(__fixup_440A_mcheck)
 	blr
 
 /*
- * extern void giveup_altivec(struct task_struct *prev)
- *
- * The 44x core does not have an AltiVec unit.
- */
-_GLOBAL(giveup_altivec)
-	blr
-
-/*
  * extern void giveup_fpu(struct task_struct *prev)
  *
  * The 44x core does not have an FPU.
Index: linux-build/arch/powerpc/kernel/head_fsl_booke.S
===================================================================
--- linux-build.orig/arch/powerpc/kernel/head_fsl_booke.S	2012-04-16 12:56:11.940708046 +1000
+++ linux-build/arch/powerpc/kernel/head_fsl_booke.S	2012-04-16 12:56:45.153313231 +1000
@@ -874,14 +874,6 @@ _GLOBAL(__setup_e500mc_ivors)
 	sync
 	blr
 
-/*
- * extern void giveup_altivec(struct task_struct *prev)
- *
- * The e500 core does not have an AltiVec unit.
- */
-_GLOBAL(giveup_altivec)
-	blr
-
 #ifdef CONFIG_SPE
 /*
  * extern void giveup_spe(struct task_struct *prev)

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [PATCH 2/2] powerpc: Optimise enable_kernel_altivec
  2012-04-16  6:54 [PATCH 1/2] powerpc: Remove empty giveup_altivec function on book3e CPUs Anton Blanchard
@ 2012-04-16  6:56 ` Anton Blanchard
  0 siblings, 0 replies; 2+ messages in thread
From: Anton Blanchard @ 2012-04-16  6:56 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev


Add two optimisations to enable_kernel_altivec:

- enable_kernel_altivec has already determined if we need to
save the previous task's state but we call giveup_altivec
in both cases, requiring an extra branch in giveup_altivec. Create
giveup_altivec_notask which only turns on the VMX bit in the
MSR.

- We write the VMX MSR bit each time we call enable_kernel_altivec
even it was already set. Check the bit and branch out if we have
already set it. The classic case for this is vectored IO
where we have to copy multiple buffers to or from userspace.

The following testcase was used to confirm this patch improves
performance:

http://ozlabs.org/~anton/junkcode/copy_to_user.c

Since the current breakpoint for using VMX in copy_tofrom_user is
4096 bytes, I'm using buffers of 4096 + 1 cacheline (4224) bytes.
A benchmark of 16 entry readvs (-s 16):

time copy_to_user -l 4224 -s 16 -i 1000000

completes 5.2% faster on a POWER7 PS700.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: linux-build/arch/powerpc/kernel/process.c
===================================================================
--- linux-build.orig/arch/powerpc/kernel/process.c	2012-04-16 11:35:19.000000000 +1000
+++ linux-build/arch/powerpc/kernel/process.c	2012-04-16 12:56:47.489355793 +1000
@@ -124,7 +124,7 @@ void enable_kernel_altivec(void)
 	if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
 		giveup_altivec(current);
 	else
-		giveup_altivec(NULL);	/* just enable AltiVec for kernel - force */
+		giveup_altivec_notask();
 #else
 	giveup_altivec(last_task_used_altivec);
 #endif /* CONFIG_SMP */
Index: linux-build/arch/powerpc/kernel/vector.S
===================================================================
--- linux-build.orig/arch/powerpc/kernel/vector.S	2012-04-12 20:06:21.000000000 +1000
+++ linux-build/arch/powerpc/kernel/vector.S	2012-04-16 12:56:47.489355793 +1000
@@ -89,6 +89,16 @@ _GLOBAL(load_up_altivec)
 	/* restore registers and return */
 	blr
 
+_GLOBAL(giveup_altivec_notask)
+	mfmsr	r3
+	andis.	r4,r3,MSR_VEC@h
+	bnelr				/* Already enabled? */
+	oris	r3,r3,MSR_VEC@h
+	SYNC
+	MTMSRD(r3)			/* enable use of VMX now */
+	isync
+	blr
+
 /*
  * giveup_altivec(tsk)
  * Disable VMX for the task given as the argument,
Index: linux-build/arch/powerpc/include/asm/switch_to.h
===================================================================
--- linux-build.orig/arch/powerpc/include/asm/switch_to.h	2012-04-16 12:56:45.149313158 +1000
+++ linux-build/arch/powerpc/include/asm/switch_to.h	2012-04-16 12:56:47.489355793 +1000
@@ -40,6 +40,7 @@ static inline void discard_lazy_cpu_stat
 #ifdef CONFIG_ALTIVEC
 extern void flush_altivec_to_thread(struct task_struct *);
 extern void giveup_altivec(struct task_struct *);
+extern void giveup_altivec_notask(void);
 #else
 static inline void flush_altivec_to_thread(struct task_struct *t)
 {

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2012-04-16  6:56 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-04-16  6:54 [PATCH 1/2] powerpc: Remove empty giveup_altivec function on book3e CPUs Anton Blanchard
2012-04-16  6:56 ` [PATCH 2/2] powerpc: Optimise enable_kernel_altivec Anton Blanchard

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.