linux-crypto.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: x86@kernel.org, linux-kernel@vger.kernel.org,
	"Chang S . Bae" <chang.seok.bae@intel.com>
Subject: [PATCH 2/3] crypto: x86/aes-xts - eliminate a few more instructions
Date: Fri, 12 Apr 2024 20:17:27 -0700	[thread overview]
Message-ID: <20240413031728.159495-3-ebiggers@kernel.org> (raw)
In-Reply-To: <20240413031728.159495-1-ebiggers@kernel.org>

From: Eric Biggers <ebiggers@google.com>

- For conditionally subtracting 16 from LEN when decrypting a message
  whose length isn't a multiple of 16, use the cmovnz instruction.

- Fold the addition of 4*VL to LEN into the sub of VL or 16 from LEN.

- Remove an unnecessary test instruction.

This results in slightly shorter code, both source and binary.

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/x86/crypto/aes-xts-avx-x86_64.S | 39 ++++++++++------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S
index f5e7ab739105..802d3b90d337 100644
--- a/arch/x86/crypto/aes-xts-avx-x86_64.S
+++ b/arch/x86/crypto/aes-xts-avx-x86_64.S
@@ -557,24 +557,24 @@
 .endm
 
 .macro	_aes_xts_crypt	enc
 	_define_aliases
 
-	// Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256).
-	movl		480(KEY), KEYLEN
-
 .if !\enc
 	// When decrypting a message whose length isn't a multiple of the AES
 	// block length, exclude the last full block from the main loop by
 	// subtracting 16 from LEN.  This is needed because ciphertext stealing
 	// decryption uses the last two tweaks in reverse order.  We'll handle
 	// the last full block and the partial block specially at the end.
+	lea		-16(LEN), %rax
 	test		$15, LEN
-	jnz		.Lneed_cts_dec\@
-.Lxts_init\@:
+	cmovnz		%rax, LEN
 .endif
 
+	// Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256).
+	movl		480(KEY), KEYLEN
+
 	// Setup the pointer to the round keys and cache as many as possible.
 	_setup_round_keys	\enc
 
 	// Compute the first set of tweaks TWEAK[0-3].
 	_compute_first_set_of_tweaks
@@ -659,15 +659,14 @@
 	vzeroupper
 .endif
 	RET
 
 .Lhandle_remainder\@:
-	add		$4*VL, LEN	// Undo the extra sub from earlier.
 
 	// En/decrypt any remaining full blocks, one vector at a time.
 .if VL > 16
-	sub		$VL, LEN
+	add		$3*VL, LEN	// Undo extra sub of 4*VL, then sub VL.
 	jl		.Lvec_at_a_time_done\@
 .Lvec_at_a_time\@:
 	_vmovdqu	(SRC), V0
 	_aes_crypt	\enc, , TWEAK0, V0
 	_vmovdqu	V0, (DST)
@@ -675,13 +674,13 @@
 	add		$VL, SRC
 	add		$VL, DST
 	sub		$VL, LEN
 	jge		.Lvec_at_a_time\@
 .Lvec_at_a_time_done\@:
-	add		$VL-16, LEN	// Undo the extra sub from earlier.
+	add		$VL-16, LEN	// Undo extra sub of VL, then sub 16.
 .else
-	sub		$16, LEN
+	add		$4*VL-16, LEN	// Undo extra sub of 4*VL, then sub 16.
 .endif
 
 	// En/decrypt any remaining full blocks, one at a time.
 	jl		.Lblock_at_a_time_done\@
 .Lblock_at_a_time\@:
@@ -692,28 +691,16 @@
 	add		$16, SRC
 	add		$16, DST
 	sub		$16, LEN
 	jge		.Lblock_at_a_time\@
 .Lblock_at_a_time_done\@:
-	add		$16, LEN	// Undo the extra sub from earlier.
-
-.Lfull_blocks_done\@:
-	// Now 0 <= LEN <= 15.  If LEN is nonzero, do ciphertext stealing to
-	// process the last 16 + LEN bytes.  If LEN is zero, we're done.
-	test		LEN, LEN
-	jnz		.Lcts\@
-	jmp		.Ldone\@
-
-.if !\enc
-.Lneed_cts_dec\@:
-	sub		$16, LEN
-	jmp		.Lxts_init\@
-.endif
+	add		$16, LEN	// Undo the extra sub of 16.
+	// Now 0 <= LEN <= 15.  If LEN is zero, we're done.
+	jz		.Ldone\@
 
-.Lcts\@:
-	// Do ciphertext stealing (CTS) to en/decrypt the last full block and
-	// the partial block.  TWEAK0_XMM contains the next tweak.
+	// Otherwise 1 <= LEN <= 15, but the real remaining length is 16 + LEN.
+	// Do ciphertext stealing to process the last 16 + LEN bytes.
 
 .if \enc
 	// If encrypting, the main loop already encrypted the last full block to
 	// create the CTS intermediate ciphertext.  Prepare for the rest of CTS
 	// by rewinding the pointers and loading the intermediate ciphertext.
-- 
2.44.0


  parent reply	other threads:[~2024-04-13  3:21 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-13  3:17 [PATCH 0/3] crypto: x86/aes-xts - additional tuning Eric Biggers
2024-04-13  3:17 ` [PATCH 1/3] crypto: x86/aes-xts - handle AES-128 and AES-192 more efficiently Eric Biggers
2024-04-13  3:17 ` Eric Biggers [this message]
2024-04-13  3:17 ` [PATCH 3/3] crypto: x86/aes-xts - optimize size of instructions operating on lengths Eric Biggers
2024-04-15 12:11 ` [PATCH 0/3] crypto: x86/aes-xts - additional tuning Ard Biesheuvel
2024-04-19 11:04 ` Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240413031728.159495-3-ebiggers@kernel.org \
    --to=ebiggers@kernel.org \
    --cc=chang.seok.bae@intel.com \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).