All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: x86@kernel.org, linux-kernel@vger.kernel.org,
	"Chang S . Bae" <chang.seok.bae@intel.com>
Subject: [PATCH] crypto: x86/aesni-xts - deduplicate aesni_xts_enc() and aesni_xts_dec()
Date: Fri, 12 Apr 2024 17:09:47 -0700	[thread overview]
Message-ID: <20240413000947.67988-1-ebiggers@kernel.org> (raw)

From: Eric Biggers <ebiggers@google.com>

Since aesni_xts_enc() and aesni_xts_dec() are very similar, generate
them from a macro that's passed an argument enc=1 or enc=0.  This
reduces the length of aesni-intel_asm.S by 112 lines while still
producing the exact same object file in both 32-bit and 64-bit mode.

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/x86/crypto/aesni-intel_asm.S | 270 +++++++++---------------------
 1 file changed, 79 insertions(+), 191 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 1cb55eea2efa..3a3e46188dec 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -2823,32 +2823,28 @@ SYM_FUNC_END(aesni_ctr_enc)
 .Lgf128mul_x_ble_mask:
 	.octa 0x00000000000000010000000000000087
 .previous
 
 /*
- * _aesni_gf128mul_x_ble:		internal ABI
- *	Multiply in GF(2^128) for XTS IVs
+ * _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs
  * input:
  *	IV:	current IV
  *	GF128MUL_MASK == mask with 0x87 and 0x01
  * output:
  *	IV:	next IV
  * changed:
- *	CTR:	== temporary value
+ *	KEY:	== temporary value
  */
-#define _aesni_gf128mul_x_ble() \
-	pshufd $0x13, IV, KEY; \
-	paddq IV, IV; \
-	psrad $31, KEY; \
-	pand GF128MUL_MASK, KEY; \
-	pxor KEY, IV;
+.macro _aesni_gf128mul_x_ble
+	pshufd $0x13, IV, KEY
+	paddq IV, IV
+	psrad $31, KEY
+	pand GF128MUL_MASK, KEY
+	pxor KEY, IV
+.endm
 
-/*
- * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
- *		      const u8 *src, unsigned int len, le128 *iv)
- */
-SYM_FUNC_START(aesni_xts_enc)
+.macro	_aesni_xts_crypt	enc
 	FRAME_BEGIN
 #ifndef __x86_64__
 	pushl IVP
 	pushl LEN
 	pushl KEYP
@@ -2863,39 +2859,50 @@ SYM_FUNC_START(aesni_xts_enc)
 	movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
 #endif
 	movups (IVP), IV
 
 	mov 480(KEYP), KLEN
+.if !\enc
+	add $240, KEYP
+
+	test $15, LEN
+	jz .Lxts_loop4\@
+	sub $16, LEN
+.endif
 
-.Lxts_enc_loop4:
+.Lxts_loop4\@:
 	sub $64, LEN
-	jl .Lxts_enc_1x
+	jl .Lxts_1x\@
 
 	movdqa IV, STATE1
 	movdqu 0x00(INP), IN
 	pxor IN, STATE1
 	movdqu IV, 0x00(OUTP)
 
-	_aesni_gf128mul_x_ble()
+	_aesni_gf128mul_x_ble
 	movdqa IV, STATE2
 	movdqu 0x10(INP), IN
 	pxor IN, STATE2
 	movdqu IV, 0x10(OUTP)
 
-	_aesni_gf128mul_x_ble()
+	_aesni_gf128mul_x_ble
 	movdqa IV, STATE3
 	movdqu 0x20(INP), IN
 	pxor IN, STATE3
 	movdqu IV, 0x20(OUTP)
 
-	_aesni_gf128mul_x_ble()
+	_aesni_gf128mul_x_ble
 	movdqa IV, STATE4
 	movdqu 0x30(INP), IN
 	pxor IN, STATE4
 	movdqu IV, 0x30(OUTP)
 
+.if \enc
 	call _aesni_enc4
+.else
+	call _aesni_dec4
+.endif
 
 	movdqu 0x00(OUTP), IN
 	pxor IN, STATE1
 	movdqu STATE1, 0x00(OUTP)
 
@@ -2909,63 +2916,84 @@ SYM_FUNC_START(aesni_xts_enc)
 
 	movdqu 0x30(OUTP), IN
 	pxor IN, STATE4
 	movdqu STATE4, 0x30(OUTP)
 
-	_aesni_gf128mul_x_ble()
+	_aesni_gf128mul_x_ble
 
 	add $64, INP
 	add $64, OUTP
 	test LEN, LEN
-	jnz .Lxts_enc_loop4
+	jnz .Lxts_loop4\@
 
-.Lxts_enc_ret_iv:
+.Lxts_ret_iv\@:
 	movups IV, (IVP)
 
-.Lxts_enc_ret:
+.Lxts_ret\@:
 #ifndef __x86_64__
 	popl KLEN
 	popl KEYP
 	popl LEN
 	popl IVP
 #endif
 	FRAME_END
 	RET
 
-.Lxts_enc_1x:
+.Lxts_1x\@:
 	add $64, LEN
-	jz .Lxts_enc_ret_iv
+	jz .Lxts_ret_iv\@
+.if \enc
 	sub $16, LEN
-	jl .Lxts_enc_cts4
+	jl .Lxts_cts4\@
+.endif
 
-.Lxts_enc_loop1:
+.Lxts_loop1\@:
 	movdqu (INP), STATE
+.if \enc
 	pxor IV, STATE
 	call _aesni_enc1
+.else
+	add $16, INP
+	sub $16, LEN
+	jl .Lxts_cts1\@
 	pxor IV, STATE
-	_aesni_gf128mul_x_ble()
+	call _aesni_dec1
+.endif
+	pxor IV, STATE
+	_aesni_gf128mul_x_ble
 
 	test LEN, LEN
-	jz .Lxts_enc_out
+	jz .Lxts_out\@
 
+.if \enc
 	add $16, INP
 	sub $16, LEN
-	jl .Lxts_enc_cts1
+	jl .Lxts_cts1\@
+.endif
 
 	movdqu STATE, (OUTP)
 	add $16, OUTP
-	jmp .Lxts_enc_loop1
+	jmp .Lxts_loop1\@
 
-.Lxts_enc_out:
+.Lxts_out\@:
 	movdqu STATE, (OUTP)
-	jmp .Lxts_enc_ret_iv
+	jmp .Lxts_ret_iv\@
 
-.Lxts_enc_cts4:
+.if \enc
+.Lxts_cts4\@:
 	movdqa STATE4, STATE
 	sub $16, OUTP
+.Lxts_cts1\@:
+.else
+.Lxts_cts1\@:
+	movdqa IV, STATE4
+	_aesni_gf128mul_x_ble
 
-.Lxts_enc_cts1:
+	pxor IV, STATE
+	call _aesni_dec1
+	pxor IV, STATE
+.endif
 #ifndef __x86_64__
 	lea .Lcts_permute_table, T1
 #else
 	lea .Lcts_permute_table(%rip), T1
 #endif
@@ -2987,174 +3015,34 @@ SYM_FUNC_START(aesni_xts_enc)
 	movups (IVP), %xmm0
 	pshufb %xmm0, IN1
 	pblendvb IN2, IN1
 	movaps IN1, STATE
 
+.if \enc
 	pxor IV, STATE
 	call _aesni_enc1
 	pxor IV, STATE
+.else
+	pxor STATE4, STATE
+	call _aesni_dec1
+	pxor STATE4, STATE
+.endif
 
 	movups STATE, (OUTP)
-	jmp .Lxts_enc_ret
+	jmp .Lxts_ret\@
+.endm
+
+/*
+ * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
+ *		      const u8 *src, unsigned int len, le128 *iv)
+ */
+SYM_FUNC_START(aesni_xts_enc)
+	_aesni_xts_crypt	1
 SYM_FUNC_END(aesni_xts_enc)
 
 /*
  * void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *dst,
  *		      const u8 *src, unsigned int len, le128 *iv)
  */
 SYM_FUNC_START(aesni_xts_dec)
-	FRAME_BEGIN
-#ifndef __x86_64__
-	pushl IVP
-	pushl LEN
-	pushl KEYP
-	pushl KLEN
-	movl (FRAME_OFFSET+20)(%esp), KEYP	# ctx
-	movl (FRAME_OFFSET+24)(%esp), OUTP	# dst
-	movl (FRAME_OFFSET+28)(%esp), INP	# src
-	movl (FRAME_OFFSET+32)(%esp), LEN	# len
-	movl (FRAME_OFFSET+36)(%esp), IVP	# iv
-	movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
-#else
-	movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
-#endif
-	movups (IVP), IV
-
-	mov 480(KEYP), KLEN
-	add $240, KEYP
-
-	test $15, LEN
-	jz .Lxts_dec_loop4
-	sub $16, LEN
-
-.Lxts_dec_loop4:
-	sub $64, LEN
-	jl .Lxts_dec_1x
-
-	movdqa IV, STATE1
-	movdqu 0x00(INP), IN
-	pxor IN, STATE1
-	movdqu IV, 0x00(OUTP)
-
-	_aesni_gf128mul_x_ble()
-	movdqa IV, STATE2
-	movdqu 0x10(INP), IN
-	pxor IN, STATE2
-	movdqu IV, 0x10(OUTP)
-
-	_aesni_gf128mul_x_ble()
-	movdqa IV, STATE3
-	movdqu 0x20(INP), IN
-	pxor IN, STATE3
-	movdqu IV, 0x20(OUTP)
-
-	_aesni_gf128mul_x_ble()
-	movdqa IV, STATE4
-	movdqu 0x30(INP), IN
-	pxor IN, STATE4
-	movdqu IV, 0x30(OUTP)
-
-	call _aesni_dec4
-
-	movdqu 0x00(OUTP), IN
-	pxor IN, STATE1
-	movdqu STATE1, 0x00(OUTP)
-
-	movdqu 0x10(OUTP), IN
-	pxor IN, STATE2
-	movdqu STATE2, 0x10(OUTP)
-
-	movdqu 0x20(OUTP), IN
-	pxor IN, STATE3
-	movdqu STATE3, 0x20(OUTP)
-
-	movdqu 0x30(OUTP), IN
-	pxor IN, STATE4
-	movdqu STATE4, 0x30(OUTP)
-
-	_aesni_gf128mul_x_ble()
-
-	add $64, INP
-	add $64, OUTP
-	test LEN, LEN
-	jnz .Lxts_dec_loop4
-
-.Lxts_dec_ret_iv:
-	movups IV, (IVP)
-
-.Lxts_dec_ret:
-#ifndef __x86_64__
-	popl KLEN
-	popl KEYP
-	popl LEN
-	popl IVP
-#endif
-	FRAME_END
-	RET
-
-.Lxts_dec_1x:
-	add $64, LEN
-	jz .Lxts_dec_ret_iv
-
-.Lxts_dec_loop1:
-	movdqu (INP), STATE
-
-	add $16, INP
-	sub $16, LEN
-	jl .Lxts_dec_cts1
-
-	pxor IV, STATE
-	call _aesni_dec1
-	pxor IV, STATE
-	_aesni_gf128mul_x_ble()
-
-	test LEN, LEN
-	jz .Lxts_dec_out
-
-	movdqu STATE, (OUTP)
-	add $16, OUTP
-	jmp .Lxts_dec_loop1
-
-.Lxts_dec_out:
-	movdqu STATE, (OUTP)
-	jmp .Lxts_dec_ret_iv
-
-.Lxts_dec_cts1:
-	movdqa IV, STATE4
-	_aesni_gf128mul_x_ble()
-
-	pxor IV, STATE
-	call _aesni_dec1
-	pxor IV, STATE
-
-#ifndef __x86_64__
-	lea .Lcts_permute_table, T1
-#else
-	lea .Lcts_permute_table(%rip), T1
-#endif
-	add LEN, INP		/* rewind input pointer */
-	add $16, LEN		/* # bytes in final block */
-	movups (INP), IN1
-
-	mov T1, IVP
-	add $32, IVP
-	add LEN, T1
-	sub LEN, IVP
-	add OUTP, LEN
-
-	movups (T1), %xmm4
-	movaps STATE, IN2
-	pshufb %xmm4, STATE
-	movups STATE, (LEN)
-
-	movups (IVP), %xmm0
-	pshufb %xmm0, IN1
-	pblendvb IN2, IN1
-	movaps IN1, STATE
-
-	pxor STATE4, STATE
-	call _aesni_dec1
-	pxor STATE4, STATE
-
-	movups STATE, (OUTP)
-	jmp .Lxts_dec_ret
+	_aesni_xts_crypt	0
 SYM_FUNC_END(aesni_xts_dec)

base-commit: 751fb2528c12ef64d1e863efb196cdc968b384f6
-- 
2.44.0


             reply	other threads:[~2024-04-13  0:10 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-13  0:09 Eric Biggers [this message]
2024-04-13  8:54 ` [PATCH] crypto: x86/aesni-xts - deduplicate aesni_xts_enc() and aesni_xts_dec() Ard Biesheuvel
2024-04-19 11:04 ` Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240413000947.67988-1-ebiggers@kernel.org \
    --to=ebiggers@kernel.org \
    --cc=chang.seok.bae@intel.com \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.