From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: x86@kernel.org, linux-kernel@vger.kernel.org,
"Chang S . Bae" <chang.seok.bae@intel.com>
Subject: [PATCH] crypto: x86/aesni-xts - deduplicate aesni_xts_enc() and aesni_xts_dec()
Date: Fri, 12 Apr 2024 17:09:47 -0700 [thread overview]
Message-ID: <20240413000947.67988-1-ebiggers@kernel.org> (raw)
From: Eric Biggers <ebiggers@google.com>
Since aesni_xts_enc() and aesni_xts_dec() are very similar, generate
them from a macro that's passed an argument enc=1 or enc=0. This
reduces the length of aesni-intel_asm.S by 112 lines while still
producing the exact same object file in both 32-bit and 64-bit mode.
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/x86/crypto/aesni-intel_asm.S | 270 +++++++++---------------------
1 file changed, 79 insertions(+), 191 deletions(-)
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 1cb55eea2efa..3a3e46188dec 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -2823,32 +2823,28 @@ SYM_FUNC_END(aesni_ctr_enc)
.Lgf128mul_x_ble_mask:
.octa 0x00000000000000010000000000000087
.previous
/*
- * _aesni_gf128mul_x_ble: internal ABI
- * Multiply in GF(2^128) for XTS IVs
+ * _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs
* input:
* IV: current IV
* GF128MUL_MASK == mask with 0x87 and 0x01
* output:
* IV: next IV
* changed:
- * CTR: == temporary value
+ * KEY: == temporary value
*/
-#define _aesni_gf128mul_x_ble() \
- pshufd $0x13, IV, KEY; \
- paddq IV, IV; \
- psrad $31, KEY; \
- pand GF128MUL_MASK, KEY; \
- pxor KEY, IV;
+.macro _aesni_gf128mul_x_ble
+ pshufd $0x13, IV, KEY
+ paddq IV, IV
+ psrad $31, KEY
+ pand GF128MUL_MASK, KEY
+ pxor KEY, IV
+.endm
-/*
- * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
- * const u8 *src, unsigned int len, le128 *iv)
- */
-SYM_FUNC_START(aesni_xts_enc)
+.macro _aesni_xts_crypt enc
FRAME_BEGIN
#ifndef __x86_64__
pushl IVP
pushl LEN
pushl KEYP
@@ -2863,39 +2859,50 @@ SYM_FUNC_START(aesni_xts_enc)
movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
#endif
movups (IVP), IV
mov 480(KEYP), KLEN
+.if !\enc
+ add $240, KEYP
+
+ test $15, LEN
+ jz .Lxts_loop4\@
+ sub $16, LEN
+.endif
-.Lxts_enc_loop4:
+.Lxts_loop4\@:
sub $64, LEN
- jl .Lxts_enc_1x
+ jl .Lxts_1x\@
movdqa IV, STATE1
movdqu 0x00(INP), IN
pxor IN, STATE1
movdqu IV, 0x00(OUTP)
- _aesni_gf128mul_x_ble()
+ _aesni_gf128mul_x_ble
movdqa IV, STATE2
movdqu 0x10(INP), IN
pxor IN, STATE2
movdqu IV, 0x10(OUTP)
- _aesni_gf128mul_x_ble()
+ _aesni_gf128mul_x_ble
movdqa IV, STATE3
movdqu 0x20(INP), IN
pxor IN, STATE3
movdqu IV, 0x20(OUTP)
- _aesni_gf128mul_x_ble()
+ _aesni_gf128mul_x_ble
movdqa IV, STATE4
movdqu 0x30(INP), IN
pxor IN, STATE4
movdqu IV, 0x30(OUTP)
+.if \enc
call _aesni_enc4
+.else
+ call _aesni_dec4
+.endif
movdqu 0x00(OUTP), IN
pxor IN, STATE1
movdqu STATE1, 0x00(OUTP)
@@ -2909,63 +2916,84 @@ SYM_FUNC_START(aesni_xts_enc)
movdqu 0x30(OUTP), IN
pxor IN, STATE4
movdqu STATE4, 0x30(OUTP)
- _aesni_gf128mul_x_ble()
+ _aesni_gf128mul_x_ble
add $64, INP
add $64, OUTP
test LEN, LEN
- jnz .Lxts_enc_loop4
+ jnz .Lxts_loop4\@
-.Lxts_enc_ret_iv:
+.Lxts_ret_iv\@:
movups IV, (IVP)
-.Lxts_enc_ret:
+.Lxts_ret\@:
#ifndef __x86_64__
popl KLEN
popl KEYP
popl LEN
popl IVP
#endif
FRAME_END
RET
-.Lxts_enc_1x:
+.Lxts_1x\@:
add $64, LEN
- jz .Lxts_enc_ret_iv
+ jz .Lxts_ret_iv\@
+.if \enc
sub $16, LEN
- jl .Lxts_enc_cts4
+ jl .Lxts_cts4\@
+.endif
-.Lxts_enc_loop1:
+.Lxts_loop1\@:
movdqu (INP), STATE
+.if \enc
pxor IV, STATE
call _aesni_enc1
+.else
+ add $16, INP
+ sub $16, LEN
+ jl .Lxts_cts1\@
pxor IV, STATE
- _aesni_gf128mul_x_ble()
+ call _aesni_dec1
+.endif
+ pxor IV, STATE
+ _aesni_gf128mul_x_ble
test LEN, LEN
- jz .Lxts_enc_out
+ jz .Lxts_out\@
+.if \enc
add $16, INP
sub $16, LEN
- jl .Lxts_enc_cts1
+ jl .Lxts_cts1\@
+.endif
movdqu STATE, (OUTP)
add $16, OUTP
- jmp .Lxts_enc_loop1
+ jmp .Lxts_loop1\@
-.Lxts_enc_out:
+.Lxts_out\@:
movdqu STATE, (OUTP)
- jmp .Lxts_enc_ret_iv
+ jmp .Lxts_ret_iv\@
-.Lxts_enc_cts4:
+.if \enc
+.Lxts_cts4\@:
movdqa STATE4, STATE
sub $16, OUTP
+.Lxts_cts1\@:
+.else
+.Lxts_cts1\@:
+ movdqa IV, STATE4
+ _aesni_gf128mul_x_ble
-.Lxts_enc_cts1:
+ pxor IV, STATE
+ call _aesni_dec1
+ pxor IV, STATE
+.endif
#ifndef __x86_64__
lea .Lcts_permute_table, T1
#else
lea .Lcts_permute_table(%rip), T1
#endif
@@ -2987,174 +3015,34 @@ SYM_FUNC_START(aesni_xts_enc)
movups (IVP), %xmm0
pshufb %xmm0, IN1
pblendvb IN2, IN1
movaps IN1, STATE
+.if \enc
pxor IV, STATE
call _aesni_enc1
pxor IV, STATE
+.else
+ pxor STATE4, STATE
+ call _aesni_dec1
+ pxor STATE4, STATE
+.endif
movups STATE, (OUTP)
- jmp .Lxts_enc_ret
+ jmp .Lxts_ret\@
+.endm
+
+/*
+ * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
+ * const u8 *src, unsigned int len, le128 *iv)
+ */
+SYM_FUNC_START(aesni_xts_enc)
+ _aesni_xts_crypt 1
SYM_FUNC_END(aesni_xts_enc)
/*
* void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *dst,
* const u8 *src, unsigned int len, le128 *iv)
*/
SYM_FUNC_START(aesni_xts_dec)
- FRAME_BEGIN
-#ifndef __x86_64__
- pushl IVP
- pushl LEN
- pushl KEYP
- pushl KLEN
- movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
- movl (FRAME_OFFSET+24)(%esp), OUTP # dst
- movl (FRAME_OFFSET+28)(%esp), INP # src
- movl (FRAME_OFFSET+32)(%esp), LEN # len
- movl (FRAME_OFFSET+36)(%esp), IVP # iv
- movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
-#else
- movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
-#endif
- movups (IVP), IV
-
- mov 480(KEYP), KLEN
- add $240, KEYP
-
- test $15, LEN
- jz .Lxts_dec_loop4
- sub $16, LEN
-
-.Lxts_dec_loop4:
- sub $64, LEN
- jl .Lxts_dec_1x
-
- movdqa IV, STATE1
- movdqu 0x00(INP), IN
- pxor IN, STATE1
- movdqu IV, 0x00(OUTP)
-
- _aesni_gf128mul_x_ble()
- movdqa IV, STATE2
- movdqu 0x10(INP), IN
- pxor IN, STATE2
- movdqu IV, 0x10(OUTP)
-
- _aesni_gf128mul_x_ble()
- movdqa IV, STATE3
- movdqu 0x20(INP), IN
- pxor IN, STATE3
- movdqu IV, 0x20(OUTP)
-
- _aesni_gf128mul_x_ble()
- movdqa IV, STATE4
- movdqu 0x30(INP), IN
- pxor IN, STATE4
- movdqu IV, 0x30(OUTP)
-
- call _aesni_dec4
-
- movdqu 0x00(OUTP), IN
- pxor IN, STATE1
- movdqu STATE1, 0x00(OUTP)
-
- movdqu 0x10(OUTP), IN
- pxor IN, STATE2
- movdqu STATE2, 0x10(OUTP)
-
- movdqu 0x20(OUTP), IN
- pxor IN, STATE3
- movdqu STATE3, 0x20(OUTP)
-
- movdqu 0x30(OUTP), IN
- pxor IN, STATE4
- movdqu STATE4, 0x30(OUTP)
-
- _aesni_gf128mul_x_ble()
-
- add $64, INP
- add $64, OUTP
- test LEN, LEN
- jnz .Lxts_dec_loop4
-
-.Lxts_dec_ret_iv:
- movups IV, (IVP)
-
-.Lxts_dec_ret:
-#ifndef __x86_64__
- popl KLEN
- popl KEYP
- popl LEN
- popl IVP
-#endif
- FRAME_END
- RET
-
-.Lxts_dec_1x:
- add $64, LEN
- jz .Lxts_dec_ret_iv
-
-.Lxts_dec_loop1:
- movdqu (INP), STATE
-
- add $16, INP
- sub $16, LEN
- jl .Lxts_dec_cts1
-
- pxor IV, STATE
- call _aesni_dec1
- pxor IV, STATE
- _aesni_gf128mul_x_ble()
-
- test LEN, LEN
- jz .Lxts_dec_out
-
- movdqu STATE, (OUTP)
- add $16, OUTP
- jmp .Lxts_dec_loop1
-
-.Lxts_dec_out:
- movdqu STATE, (OUTP)
- jmp .Lxts_dec_ret_iv
-
-.Lxts_dec_cts1:
- movdqa IV, STATE4
- _aesni_gf128mul_x_ble()
-
- pxor IV, STATE
- call _aesni_dec1
- pxor IV, STATE
-
-#ifndef __x86_64__
- lea .Lcts_permute_table, T1
-#else
- lea .Lcts_permute_table(%rip), T1
-#endif
- add LEN, INP /* rewind input pointer */
- add $16, LEN /* # bytes in final block */
- movups (INP), IN1
-
- mov T1, IVP
- add $32, IVP
- add LEN, T1
- sub LEN, IVP
- add OUTP, LEN
-
- movups (T1), %xmm4
- movaps STATE, IN2
- pshufb %xmm4, STATE
- movups STATE, (LEN)
-
- movups (IVP), %xmm0
- pshufb %xmm0, IN1
- pblendvb IN2, IN1
- movaps IN1, STATE
-
- pxor STATE4, STATE
- call _aesni_dec1
- pxor STATE4, STATE
-
- movups STATE, (OUTP)
- jmp .Lxts_dec_ret
+ _aesni_xts_crypt 0
SYM_FUNC_END(aesni_xts_dec)
base-commit: 751fb2528c12ef64d1e863efb196cdc968b384f6
--
2.44.0
next reply other threads:[~2024-04-13 0:10 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-13 0:09 Eric Biggers [this message]
2024-04-13 8:54 ` [PATCH] crypto: x86/aesni-xts - deduplicate aesni_xts_enc() and aesni_xts_dec() Ard Biesheuvel
2024-04-19 11:04 ` Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240413000947.67988-1-ebiggers@kernel.org \
--to=ebiggers@kernel.org \
--cc=chang.seok.bae@intel.com \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.