All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] powerpc: Remove assembly versions of strcpy, strcat, strlen and strcmp
@ 2016-05-25 22:38 Anton Blanchard
  2016-05-25 22:39 ` [PATCH 2/2] powerpc: Align hot loops of some string functions Anton Blanchard
  2016-06-15 12:39 ` [1/2] powerpc: Remove assembly versions of strcpy, strcat, strlen and strcmp Michael Ellerman
  0 siblings, 2 replies; 8+ messages in thread
From: Anton Blanchard @ 2016-05-25 22:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Michael Ellerman, Paul Mackerras, acsawdey
  Cc: linuxppc-dev

A number of our assembly implementations of string functions do not
align their hot loops. I was going to align them manually, but I
realised that they are are almost instruction for instruction
identical to what gcc produces, with the advantage that gcc does
align them.

In light of that, let's just remove the assembly versions.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

index e40010a..da3cdff 100644
Index: linux.junk/arch/powerpc/include/asm/string.h
===================================================================
--- linux.junk.orig/arch/powerpc/include/asm/string.h
+++ linux.junk/arch/powerpc/include/asm/string.h
@@ -3,12 +3,8 @@
 
 #ifdef __KERNEL__
 
-#define __HAVE_ARCH_STRCPY
 #define __HAVE_ARCH_STRNCPY
-#define __HAVE_ARCH_STRLEN
-#define __HAVE_ARCH_STRCMP
 #define __HAVE_ARCH_STRNCMP
-#define __HAVE_ARCH_STRCAT
 #define __HAVE_ARCH_MEMSET
 #define __HAVE_ARCH_MEMCPY
 #define __HAVE_ARCH_MEMMOVE
Index: linux.junk/arch/powerpc/lib/ppc_ksyms.c
===================================================================
--- linux.junk.orig/arch/powerpc/lib/ppc_ksyms.c
+++ linux.junk/arch/powerpc/lib/ppc_ksyms.c
@@ -9,11 +9,7 @@ EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memcmp);
 EXPORT_SYMBOL(memchr);
 
-EXPORT_SYMBOL(strcpy);
 EXPORT_SYMBOL(strncpy);
-EXPORT_SYMBOL(strcat);
-EXPORT_SYMBOL(strlen);
-EXPORT_SYMBOL(strcmp);
 EXPORT_SYMBOL(strncmp);
 
 #ifndef CONFIG_GENERIC_CSUM
Index: linux.junk/arch/powerpc/lib/string.S
===================================================================
--- linux.junk.orig/arch/powerpc/lib/string.S
+++ linux.junk/arch/powerpc/lib/string.S
@@ -16,15 +16,6 @@
 	PPC_LONG_ALIGN
 	.text
 	
-_GLOBAL(strcpy)
-	addi	r5,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r0,1(r4)
-	cmpwi	0,r0,0
-	stbu	r0,1(r5)
-	bne	1b
-	blr
-
 /* This clears out any unused part of the destination buffer,
    just as the libc version does.  -- paulus */
 _GLOBAL(strncpy)
@@ -45,30 +36,6 @@ _GLOBAL(strncpy)
 	bdnz	2b
 	blr
 
-_GLOBAL(strcat)
-	addi	r5,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r0,1(r5)
-	cmpwi	0,r0,0
-	bne	1b
-	addi	r5,r5,-1
-1:	lbzu	r0,1(r4)
-	cmpwi	0,r0,0
-	stbu	r0,1(r5)
-	bne	1b
-	blr
-
-_GLOBAL(strcmp)
-	addi	r5,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r3,1(r5)
-	cmpwi	1,r3,0
-	lbzu	r0,1(r4)
-	subf.	r3,r0,r3
-	beqlr	1
-	beq	1b
-	blr
-
 _GLOBAL(strncmp)
 	PPC_LCMPI 0,r5,0
 	beq-	2f
@@ -85,14 +52,6 @@ _GLOBAL(strncmp)
 2:	li	r3,0
 	blr
 
-_GLOBAL(strlen)
-	addi	r4,r3,-1
-1:	lbzu	r0,1(r4)
-	cmpwi	0,r0,0
-	bne	1b
-	subf	r3,r3,r4
-	blr
-
 #ifdef CONFIG_PPC32
 _GLOBAL(memcmp)
 	PPC_LCMPI 0,r5,0

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 2/2] powerpc: Align hot loops of some string functions
  2016-05-25 22:38 [PATCH 1/2] powerpc: Remove assembly versions of strcpy, strcat, strlen and strcmp Anton Blanchard
@ 2016-05-25 22:39 ` Anton Blanchard
  2016-05-26  7:24   ` Christophe Leroy
  2016-06-15 12:39   ` [2/2] " Michael Ellerman
  2016-06-15 12:39 ` [1/2] powerpc: Remove assembly versions of strcpy, strcat, strlen and strcmp Michael Ellerman
  1 sibling, 2 replies; 8+ messages in thread
From: Anton Blanchard @ 2016-05-25 22:39 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Michael Ellerman, Paul Mackerras, acsawdey
  Cc: linuxppc-dev

Align the hot loops in our assembly implementation of strncpy(),
strncmp() and memchr().

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: linux.junk/arch/powerpc/lib/string.S
===================================================================
--- linux.junk.orig/arch/powerpc/lib/string.S
+++ linux.junk/arch/powerpc/lib/string.S
@@ -24,6 +24,7 @@ _GLOBAL(strncpy)
 	mtctr	r5
 	addi	r6,r3,-1
 	addi	r4,r4,-1
+	.balign 16
 1:	lbzu	r0,1(r4)
 	cmpwi	0,r0,0
 	stbu	r0,1(r6)
@@ -42,6 +43,7 @@ _GLOBAL(strncmp)
 	mtctr	r5
 	addi	r5,r3,-1
 	addi	r4,r4,-1
+	.balign 16
 1:	lbzu	r3,1(r5)
 	cmpwi	1,r3,0
 	lbzu	r0,1(r4)
@@ -73,6 +75,7 @@ _GLOBAL(memchr)
 	beq-	2f
 	mtctr	r5
 	addi	r3,r3,-1
+	.balign 16
 1:	lbzu	r0,1(r3)
 	cmpw	0,r0,r4
 	bdnzf	2,1b

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] powerpc: Align hot loops of some string functions
  2016-05-25 22:39 ` [PATCH 2/2] powerpc: Align hot loops of some string functions Anton Blanchard
@ 2016-05-26  7:24   ` Christophe Leroy
  2016-05-26 19:37     ` Segher Boessenkool
  2016-06-15 12:39   ` [2/2] " Michael Ellerman
  1 sibling, 1 reply; 8+ messages in thread
From: Christophe Leroy @ 2016-05-26  7:24 UTC (permalink / raw)
  To: Anton Blanchard, Benjamin Herrenschmidt, Michael Ellerman,
	Paul Mackerras, acsawdey
  Cc: linuxppc-dev



Le 26/05/2016 à 00:39, Anton Blanchard via Linuxppc-dev a écrit :
> Align the hot loops in our assembly implementation of strncpy(),
> strncmp() and memchr().
Wouldn't it be better to add nops before the function entry in order to 
get the hot loop aligned, instead of adding nops in the middle of the 
function ?

Christophe
>
> Signed-off-by: Anton Blanchard <anton@samba.org>
> ---
>
> Index: linux.junk/arch/powerpc/lib/string.S
> ===================================================================
> --- linux.junk.orig/arch/powerpc/lib/string.S
> +++ linux.junk/arch/powerpc/lib/string.S
> @@ -24,6 +24,7 @@ _GLOBAL(strncpy)
>   	mtctr	r5
>   	addi	r6,r3,-1
>   	addi	r4,r4,-1
> +	.balign 16
>   1:	lbzu	r0,1(r4)
>   	cmpwi	0,r0,0
>   	stbu	r0,1(r6)
> @@ -42,6 +43,7 @@ _GLOBAL(strncmp)
>   	mtctr	r5
>   	addi	r5,r3,-1
>   	addi	r4,r4,-1
> +	.balign 16
>   1:	lbzu	r3,1(r5)
>   	cmpwi	1,r3,0
>   	lbzu	r0,1(r4)
> @@ -73,6 +75,7 @@ _GLOBAL(memchr)
>   	beq-	2f
>   	mtctr	r5
>   	addi	r3,r3,-1
> +	.balign 16
>   1:	lbzu	r0,1(r3)
>   	cmpw	0,r0,r4
>   	bdnzf	2,1b
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] powerpc: Align hot loops of some string functions
  2016-05-26  7:24   ` Christophe Leroy
@ 2016-05-26 19:37     ` Segher Boessenkool
  2016-05-27  5:45       ` Christophe Leroy
  0 siblings, 1 reply; 8+ messages in thread
From: Segher Boessenkool @ 2016-05-26 19:37 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Anton Blanchard, Benjamin Herrenschmidt, Michael Ellerman,
	Paul Mackerras, acsawdey, linuxppc-dev

On Thu, May 26, 2016 at 09:24:51AM +0200, Christophe Leroy wrote:
> Wouldn't it be better to add nops before the function entry in order to 
> get the hot loop aligned, instead of adding nops in the middle of the 
> function ?

Why would that be better?  The nops are executed once per function call
in either case, there are the same number of nops in either case, and
on most CPUs nops aren't actually executed anyway (they are decoded and
the thrown away).


Segher

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] powerpc: Align hot loops of some string functions
  2016-05-26 19:37     ` Segher Boessenkool
@ 2016-05-27  5:45       ` Christophe Leroy
  2016-05-27  6:26         ` Segher Boessenkool
  0 siblings, 1 reply; 8+ messages in thread
From: Christophe Leroy @ 2016-05-27  5:45 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: Anton Blanchard, Benjamin Herrenschmidt, Michael Ellerman,
	Paul Mackerras, acsawdey, linuxppc-dev



Le 26/05/2016 à 21:37, Segher Boessenkool a écrit :
> On Thu, May 26, 2016 at 09:24:51AM +0200, Christophe Leroy wrote:
>> Wouldn't it be better to add nops before the function entry in order to
>> get the hot loop aligned, instead of adding nops in the middle of the
>> function ?
> Why would that be better?  The nops are executed once per function call
> in either case, there are the same number of nops in either case, and
> on most CPUs nops aren't actually executed anyway (they are decoded and
> the thrown away).
>
The idea was to not execute them:

|.balign 16 nop nop _GLOBAL(strcpy) addi	r5,r3,-1 addi	r4,r4,-1 1:	lbzu 
r0,1(r4) cmpwi	0,r0,0 stbu	r0,1(r5) bne	1b blr |

Christophe

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] powerpc: Align hot loops of some string functions
  2016-05-27  5:45       ` Christophe Leroy
@ 2016-05-27  6:26         ` Segher Boessenkool
  0 siblings, 0 replies; 8+ messages in thread
From: Segher Boessenkool @ 2016-05-27  6:26 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Anton Blanchard, Benjamin Herrenschmidt, Michael Ellerman,
	Paul Mackerras, acsawdey, linuxppc-dev

On Fri, May 27, 2016 at 07:45:18AM +0200, Christophe Leroy wrote:
> >>Wouldn't it be better to add nops before the function entry in order to
> >>get the hot loop aligned, instead of adding nops in the middle of the
> >>function ?
> >Why would that be better?  The nops are executed once per function call
> >in either case, there are the same number of nops in either case, and
> >on most CPUs nops aren't actually executed anyway (they are decoded and
> >the thrown away).
> >
> The idea was to not execute them:
> 
> |.balign 16 nop nop _GLOBAL(strcpy) addi	r5,r3,-1 addi	r4,r4,-1 1: 
> lbzu r0,1(r4) cmpwi	0,r0,0 stbu	r0,1(r5) bne	1b blr |

That performs _worse_ on most modern CPUs (the first decode will decode
less, so instructions are available for execution later).  That's why
functions are aligned in the first place!


Segher

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [1/2] powerpc: Remove assembly versions of strcpy, strcat, strlen and strcmp
  2016-05-25 22:38 [PATCH 1/2] powerpc: Remove assembly versions of strcpy, strcat, strlen and strcmp Anton Blanchard
  2016-05-25 22:39 ` [PATCH 2/2] powerpc: Align hot loops of some string functions Anton Blanchard
@ 2016-06-15 12:39 ` Michael Ellerman
  1 sibling, 0 replies; 8+ messages in thread
From: Michael Ellerman @ 2016-06-15 12:39 UTC (permalink / raw)
  To: Unknown sender due to SPF, Benjamin Herrenschmidt,
	Paul Mackerras, acsawdey
  Cc: linuxppc-dev

On Wed, 2016-25-05 at 22:38:13 UTC, Unknown sender due to SPF wrote:
> A number of our assembly implementations of string functions do not
> align their hot loops. I was going to align them manually, but I
> realised that they are are almost instruction for instruction
> identical to what gcc produces, with the advantage that gcc does
> align them.
> 
> In light of that, let's just remove the assembly versions.
> 
> Signed-off-by: Anton Blanchard <anton@samba.org>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/3ece16632b64120df2ef566ce3

cheers

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [2/2] powerpc: Align hot loops of some string functions
  2016-05-25 22:39 ` [PATCH 2/2] powerpc: Align hot loops of some string functions Anton Blanchard
  2016-05-26  7:24   ` Christophe Leroy
@ 2016-06-15 12:39   ` Michael Ellerman
  1 sibling, 0 replies; 8+ messages in thread
From: Michael Ellerman @ 2016-06-15 12:39 UTC (permalink / raw)
  To: Unknown sender due to SPF, Benjamin Herrenschmidt,
	Paul Mackerras, acsawdey
  Cc: linuxppc-dev

On Wed, 2016-25-05 at 22:39:55 UTC, Unknown sender due to SPF wrote:
> Align the hot loops in our assembly implementation of strncpy(),
> strncmp() and memchr().
> 
> Signed-off-by: Anton Blanchard <anton@samba.org>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/87a156fb18fe15d012c3db506b

cheers

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2016-06-15 12:39 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-25 22:38 [PATCH 1/2] powerpc: Remove assembly versions of strcpy, strcat, strlen and strcmp Anton Blanchard
2016-05-25 22:39 ` [PATCH 2/2] powerpc: Align hot loops of some string functions Anton Blanchard
2016-05-26  7:24   ` Christophe Leroy
2016-05-26 19:37     ` Segher Boessenkool
2016-05-27  5:45       ` Christophe Leroy
2016-05-27  6:26         ` Segher Boessenkool
2016-06-15 12:39   ` [2/2] " Michael Ellerman
2016-06-15 12:39 ` [1/2] powerpc: Remove assembly versions of strcpy, strcat, strlen and strcmp Michael Ellerman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.