From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, Stefan Kanthak <stefan.kanthak@nexgo.de>
Subject: [PATCH v2 3/4] crypto: x86/sha256-ni - optimize code size
Date: Thu, 11 Apr 2024 09:23:58 -0700 [thread overview]
Message-ID: <20240411162359.39073-4-ebiggers@kernel.org> (raw)
In-Reply-To: <20240411162359.39073-1-ebiggers@kernel.org>
From: Eric Biggers <ebiggers@google.com>
- Load the SHA-256 round constants relative to a pointer that points
into the middle of the constants rather than to the beginning. Since
x86 instructions use signed offsets, this decreases the instruction
length required to access some of the later round constants.
- Use punpcklqdq or punpckhqdq instead of longer instructions such as
pshufd, pblendw, and palignr. This doesn't harm performance.
The end result is that sha256_ni_transform shrinks from 839 bytes to 791
bytes, with no loss in performance.
Suggested-by: Stefan Kanthak <stefan.kanthak@nexgo.de>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/x86/crypto/sha256_ni_asm.S | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S
index b7e7001dafdf..ffc9f1c75c15 100644
--- a/arch/x86/crypto/sha256_ni_asm.S
+++ b/arch/x86/crypto/sha256_ni_asm.S
@@ -82,19 +82,19 @@
pshufb SHUF_MASK, MSG
movdqa MSG, \m0
.else
movdqa \m0, MSG
.endif
- paddd \i*4(SHA256CONSTANTS), MSG
+ paddd (\i-32)*4(SHA256CONSTANTS), MSG
sha256rnds2 STATE0, STATE1
.if \i >= 12 && \i < 60
movdqa \m0, TMP
palignr $4, \m3, TMP
paddd TMP, \m1
sha256msg2 \m0, \m1
.endif
- pshufd $0x0E, MSG, MSG
+ punpckhqdq MSG, MSG
sha256rnds2 STATE1, STATE0
.if \i >= 4 && \i < 52
sha256msg1 \m0, \m3
.endif
.endm
@@ -126,21 +126,21 @@ SYM_TYPED_FUNC_START(sha256_ni_transform)
/*
* load initial hash values
* Need to reorder these appropriately
* DCBA, HGFE -> ABEF, CDGH
*/
- movdqu 0*16(DIGEST_PTR), STATE0
- movdqu 1*16(DIGEST_PTR), STATE1
+ movdqu 0*16(DIGEST_PTR), STATE0 /* DCBA */
+ movdqu 1*16(DIGEST_PTR), STATE1 /* HGFE */
- pshufd $0xB1, STATE0, STATE0 /* CDAB */
- pshufd $0x1B, STATE1, STATE1 /* EFGH */
movdqa STATE0, TMP
- palignr $8, STATE1, STATE0 /* ABEF */
- pblendw $0xF0, TMP, STATE1 /* CDGH */
+ punpcklqdq STATE1, STATE0 /* FEBA */
+ punpckhqdq TMP, STATE1 /* DCHG */
+ pshufd $0x1B, STATE0, STATE0 /* ABEF */
+ pshufd $0xB1, STATE1, STATE1 /* CDGH */
movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK
- lea K256(%rip), SHA256CONSTANTS
+ lea K256+32*4(%rip), SHA256CONSTANTS
.Lloop0:
/* Save hash values for addition after rounds */
movdqa STATE0, ABEF_SAVE
movdqa STATE1, CDGH_SAVE
@@ -160,18 +160,18 @@ SYM_TYPED_FUNC_START(sha256_ni_transform)
add $64, DATA_PTR
cmp NUM_BLKS, DATA_PTR
jne .Lloop0
/* Write hash values back in the correct order */
- pshufd $0x1B, STATE0, STATE0 /* FEBA */
- pshufd $0xB1, STATE1, STATE1 /* DCHG */
movdqa STATE0, TMP
- pblendw $0xF0, STATE1, STATE0 /* DCBA */
- palignr $8, TMP, STATE1 /* HGFE */
+ punpcklqdq STATE1, STATE0 /* GHEF */
+ punpckhqdq TMP, STATE1 /* ABCD */
+ pshufd $0xB1, STATE0, STATE0 /* HGFE */
+ pshufd $0x1B, STATE1, STATE1 /* DCBA */
- movdqu STATE0, 0*16(DIGEST_PTR)
- movdqu STATE1, 1*16(DIGEST_PTR)
+ movdqu STATE1, 0*16(DIGEST_PTR)
+ movdqu STATE0, 1*16(DIGEST_PTR)
.Ldone_hash:
RET
SYM_FUNC_END(sha256_ni_transform)
--
2.44.0
next prev parent reply other threads:[~2024-04-11 16:25 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-11 16:23 [PATCH v2 0/4] crypto: x86/sha256-ni - cleanup and optimization Eric Biggers
2024-04-11 16:23 ` [PATCH v2 1/4] crypto: x86/sha256-ni - convert to use rounds macros Eric Biggers
2024-04-11 16:23 ` [PATCH v2 2/4] crypto: x86/sha256-ni - rename some register aliases Eric Biggers
2024-04-11 16:23 ` Eric Biggers [this message]
2024-04-11 16:23 ` [PATCH v2 4/4] crypto: x86/sha256-ni - simplify do_4rounds Eric Biggers
2024-04-15 20:41 ` Stefan Kanthak
2024-04-15 21:21 ` Eric Biggers
2024-04-15 22:04 ` Stefan Kanthak
2024-04-15 22:46 ` Eric Biggers
2024-04-16 0:17 ` Stefan Kanthak
2024-04-19 11:00 ` [PATCH v2 0/4] crypto: x86/sha256-ni - cleanup and optimization Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240411162359.39073-4-ebiggers@kernel.org \
--to=ebiggers@kernel.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=stefan.kanthak@nexgo.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.