linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl
@ 2020-08-27 17:38 Uros Bizjak
  2020-09-01 19:16 ` Jason A. Donenfeld
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Uros Bizjak @ 2020-08-27 17:38 UTC (permalink / raw)
  To: linux-crypto, x86; +Cc: Uros Bizjak, Herbert Xu, David S. Miller

x86_64 zero extends 32bit operations, so for 64bit operands,
XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
a REX prefix byte when legacy registers are used.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
---
 arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
index 137edcf038cb..7d568012cc15 100644
--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
+++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
@@ -246,7 +246,7 @@ $code.=<<___ if (!$kernel);
 ___
 &declare_function("poly1305_init_x86_64", 32, 3);
 $code.=<<___;
-	xor	%rax,%rax
+	xor	%eax,%eax
 	mov	%rax,0($ctx)		# initialize hash value
 	mov	%rax,8($ctx)
 	mov	%rax,16($ctx)
@@ -2853,7 +2853,7 @@ $code.=<<___;
 .type	poly1305_init_base2_44,\@function,3
 .align	32
 poly1305_init_base2_44:
-	xor	%rax,%rax
+	xor	%eax,%eax
 	mov	%rax,0($ctx)		# initialize hash value
 	mov	%rax,8($ctx)
 	mov	%rax,16($ctx)
@@ -3947,7 +3947,7 @@ xor128_decrypt_n_pad:
 	mov	\$16,$len
 	sub	%r10,$len
 	xor	%eax,%eax
-	xor	%r11,%r11
+	xor	%r11d,%r11d
 .Loop_dec_byte:
 	mov	($inp,$otp),%r11b
 	mov	($otp),%al
@@ -4085,7 +4085,7 @@ avx_handler:
 	.long	0xa548f3fc		# cld; rep movsq
 
 	mov	$disp,%rsi
-	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
+	xor	%ecx,%ecx		# arg1, UNW_FLAG_NHANDLER
 	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
 	mov	0(%rsi),%r8		# arg3, disp->ControlPc
 	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
-- 
2.26.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl
  2020-08-27 17:38 [PATCH] crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl Uros Bizjak
@ 2020-09-01 19:16 ` Jason A. Donenfeld
  2020-09-02  5:52   ` Uros Bizjak
  2020-09-07 13:16 ` Jason A. Donenfeld
  2020-09-11  6:56 ` Herbert Xu
  2 siblings, 1 reply; 5+ messages in thread
From: Jason A. Donenfeld @ 2020-09-01 19:16 UTC (permalink / raw)
  To: Uros Bizjak, Andy Polyakov; +Cc: linux-crypto, x86, Herbert Xu, David S. Miller

Hi Uros,

Any benchmarks for this? Seems like it's all in initialization code,
right? I'm CC'ing Andy into this.

Jason

On Thu, Aug 27, 2020 at 07:38:31PM +0200, Uros Bizjak wrote:
> x86_64 zero extends 32bit operations, so for 64bit operands,
> XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
> a REX prefix byte when legacy registers are used.
> 
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: "David S. Miller" <davem@davemloft.net>
> ---
>  arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> index 137edcf038cb..7d568012cc15 100644
> --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> @@ -246,7 +246,7 @@ $code.=<<___ if (!$kernel);
>  ___
>  &declare_function("poly1305_init_x86_64", 32, 3);
>  $code.=<<___;
> -	xor	%rax,%rax
> +	xor	%eax,%eax
>  	mov	%rax,0($ctx)		# initialize hash value
>  	mov	%rax,8($ctx)
>  	mov	%rax,16($ctx)
> @@ -2853,7 +2853,7 @@ $code.=<<___;
>  .type	poly1305_init_base2_44,\@function,3
>  .align	32
>  poly1305_init_base2_44:
> -	xor	%rax,%rax
> +	xor	%eax,%eax
>  	mov	%rax,0($ctx)		# initialize hash value
>  	mov	%rax,8($ctx)
>  	mov	%rax,16($ctx)
> @@ -3947,7 +3947,7 @@ xor128_decrypt_n_pad:
>  	mov	\$16,$len
>  	sub	%r10,$len
>  	xor	%eax,%eax
> -	xor	%r11,%r11
> +	xor	%r11d,%r11d
>  .Loop_dec_byte:
>  	mov	($inp,$otp),%r11b
>  	mov	($otp),%al
> @@ -4085,7 +4085,7 @@ avx_handler:
>  	.long	0xa548f3fc		# cld; rep movsq
>  
>  	mov	$disp,%rsi
> -	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
> +	xor	%ecx,%ecx		# arg1, UNW_FLAG_NHANDLER
>  	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
>  	mov	0(%rsi),%r8		# arg3, disp->ControlPc
>  	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
> -- 
> 2.26.2
> 

-- 
Jason A. Donenfeld
Deep Space Explorer
fr: +33 6 51 90 82 66
us: +1 513 476 1200
www.jasondonenfeld.com
www.zx2c4.com
zx2c4.com/keys/AB9942E6D4A4CFC3412620A749FC7012A5DE03AE.asc

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl
  2020-09-01 19:16 ` Jason A. Donenfeld
@ 2020-09-02  5:52   ` Uros Bizjak
  0 siblings, 0 replies; 5+ messages in thread
From: Uros Bizjak @ 2020-09-02  5:52 UTC (permalink / raw)
  To: Jason A. Donenfeld
  Cc: Andy Polyakov, Linux Crypto List, X86 ML, Herbert Xu, David S. Miller

On Tue, Sep 1, 2020 at 9:16 PM Jason A. Donenfeld <Jason@zx2c4.com> wrote:
>
> Hi Uros,
>
> Any benchmarks for this? Seems like it's all in initialization code,
> right? I'm CC'ing Andy into this.

This patch should have no performance effect, it saves REX prefix byte
when the optimization is applied to legacy registers.

Uros.

> Jason
>
> On Thu, Aug 27, 2020 at 07:38:31PM +0200, Uros Bizjak wrote:
> > x86_64 zero extends 32bit operations, so for 64bit operands,
> > XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
> > a REX prefix byte when legacy registers are used.
> >
> > Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> > Cc: Herbert Xu <herbert@gondor.apana.org.au>
> > Cc: "David S. Miller" <davem@davemloft.net>
> > ---
> >  arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
> >  1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> > index 137edcf038cb..7d568012cc15 100644
> > --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> > +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> > @@ -246,7 +246,7 @@ $code.=<<___ if (!$kernel);
> >  ___
> >  &declare_function("poly1305_init_x86_64", 32, 3);
> >  $code.=<<___;
> > -     xor     %rax,%rax
> > +     xor     %eax,%eax
> >       mov     %rax,0($ctx)            # initialize hash value
> >       mov     %rax,8($ctx)
> >       mov     %rax,16($ctx)
> > @@ -2853,7 +2853,7 @@ $code.=<<___;
> >  .type        poly1305_init_base2_44,\@function,3
> >  .align       32
> >  poly1305_init_base2_44:
> > -     xor     %rax,%rax
> > +     xor     %eax,%eax
> >       mov     %rax,0($ctx)            # initialize hash value
> >       mov     %rax,8($ctx)
> >       mov     %rax,16($ctx)
> > @@ -3947,7 +3947,7 @@ xor128_decrypt_n_pad:
> >       mov     \$16,$len
> >       sub     %r10,$len
> >       xor     %eax,%eax
> > -     xor     %r11,%r11
> > +     xor     %r11d,%r11d
> >  .Loop_dec_byte:
> >       mov     ($inp,$otp),%r11b
> >       mov     ($otp),%al
> > @@ -4085,7 +4085,7 @@ avx_handler:
> >       .long   0xa548f3fc              # cld; rep movsq
> >
> >       mov     $disp,%rsi
> > -     xor     %rcx,%rcx               # arg1, UNW_FLAG_NHANDLER
> > +     xor     %ecx,%ecx               # arg1, UNW_FLAG_NHANDLER
> >       mov     8(%rsi),%rdx            # arg2, disp->ImageBase
> >       mov     0(%rsi),%r8             # arg3, disp->ControlPc
> >       mov     16(%rsi),%r9            # arg4, disp->FunctionEntry
> > --
> > 2.26.2
> >
>
> --
> Jason A. Donenfeld
> Deep Space Explorer
> fr: +33 6 51 90 82 66
> us: +1 513 476 1200
> www.jasondonenfeld.com
> www.zx2c4.com
> zx2c4.com/keys/AB9942E6D4A4CFC3412620A749FC7012A5DE03AE.asc

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl
  2020-08-27 17:38 [PATCH] crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl Uros Bizjak
  2020-09-01 19:16 ` Jason A. Donenfeld
@ 2020-09-07 13:16 ` Jason A. Donenfeld
  2020-09-11  6:56 ` Herbert Xu
  2 siblings, 0 replies; 5+ messages in thread
From: Jason A. Donenfeld @ 2020-09-07 13:16 UTC (permalink / raw)
  To: Uros Bizjak, herbert; +Cc: linux-crypto, x86, Herbert Xu, David S. Miller

Hi Uros, Herbert,

On Thu, Aug 27, 2020 at 07:38:31PM +0200, Uros Bizjak wrote:
> x86_64 zero extends 32bit operations, so for 64bit operands,
> XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
> a REX prefix byte when legacy registers are used.
> 
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: "David S. Miller" <davem@davemloft.net>
> ---
>  arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> index 137edcf038cb..7d568012cc15 100644
> --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
> @@ -246,7 +246,7 @@ $code.=<<___ if (!$kernel);
>  ___
>  &declare_function("poly1305_init_x86_64", 32, 3);
>  $code.=<<___;
> -	xor	%rax,%rax
> +	xor	%eax,%eax
>  	mov	%rax,0($ctx)		# initialize hash value
>  	mov	%rax,8($ctx)
>  	mov	%rax,16($ctx)
> @@ -2853,7 +2853,7 @@ $code.=<<___;
>  .type	poly1305_init_base2_44,\@function,3
>  .align	32
>  poly1305_init_base2_44:
> -	xor	%rax,%rax
> +	xor	%eax,%eax
>  	mov	%rax,0($ctx)		# initialize hash value
>  	mov	%rax,8($ctx)
>  	mov	%rax,16($ctx)
> @@ -3947,7 +3947,7 @@ xor128_decrypt_n_pad:
>  	mov	\$16,$len
>  	sub	%r10,$len
>  	xor	%eax,%eax
> -	xor	%r11,%r11
> +	xor	%r11d,%r11d
>  .Loop_dec_byte:
>  	mov	($inp,$otp),%r11b
>  	mov	($otp),%al
> @@ -4085,7 +4085,7 @@ avx_handler:
>  	.long	0xa548f3fc		# cld; rep movsq
>  
>  	mov	$disp,%rsi
> -	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER
> +	xor	%ecx,%ecx		# arg1, UNW_FLAG_NHANDLER
>  	mov	8(%rsi),%rdx		# arg2, disp->ImageBase
>  	mov	0(%rsi),%r8		# arg3, disp->ControlPc
>  	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry
> -- 
> 2.26.2
> 

Per the discussion elsewhere,

Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>

for cryptodev-2.6.git, rather than crypto-2.6.git

Thanks,
Jason

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl
  2020-08-27 17:38 [PATCH] crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl Uros Bizjak
  2020-09-01 19:16 ` Jason A. Donenfeld
  2020-09-07 13:16 ` Jason A. Donenfeld
@ 2020-09-11  6:56 ` Herbert Xu
  2 siblings, 0 replies; 5+ messages in thread
From: Herbert Xu @ 2020-09-11  6:56 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: linux-crypto, x86, David S. Miller

On Thu, Aug 27, 2020 at 07:38:31PM +0200, Uros Bizjak wrote:
> x86_64 zero extends 32bit operations, so for 64bit operands,
> XORL r32,r32 is functionally equal to XORQ r64,r64, but avoids
> a REX prefix byte when legacy registers are used.
> 
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: "David S. Miller" <davem@davemloft.net>
> ---
>  arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)

Patch applied.  Thanks.
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-09-11  6:56 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-27 17:38 [PATCH] crypto/x86: Use XORL r32,32 in poly1305-x86_64-cryptogams.pl Uros Bizjak
2020-09-01 19:16 ` Jason A. Donenfeld
2020-09-02  5:52   ` Uros Bizjak
2020-09-07 13:16 ` Jason A. Donenfeld
2020-09-11  6:56 ` Herbert Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).