All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: x86@kernel.org, linux-kernel@vger.kernel.org,
	"Chang S . Bae" <chang.seok.bae@intel.com>
Subject: [PATCH 2/3] crypto: x86/aes-xts - eliminate a few more instructions
Date: Fri, 12 Apr 2024 20:17:27 -0700	[thread overview]
Message-ID: <20240413031728.159495-3-ebiggers@kernel.org> (raw)
In-Reply-To: <20240413031728.159495-1-ebiggers@kernel.org>

From: Eric Biggers <ebiggers@google.com>

- For conditionally subtracting 16 from LEN when decrypting a message
  whose length isn't a multiple of 16, use the cmovnz instruction.

- Fold the addition of 4*VL to LEN into the sub of VL or 16 from LEN.

- Remove an unnecessary test instruction.

This results in slightly shorter code, both source and binary.

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/x86/crypto/aes-xts-avx-x86_64.S | 39 ++++++++++------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S
index f5e7ab739105..802d3b90d337 100644
--- a/arch/x86/crypto/aes-xts-avx-x86_64.S
+++ b/arch/x86/crypto/aes-xts-avx-x86_64.S
@@ -557,24 +557,24 @@
 .endm
 
 .macro	_aes_xts_crypt	enc
 	_define_aliases
 
-	// Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256).
-	movl		480(KEY), KEYLEN
-
 .if !\enc
 	// When decrypting a message whose length isn't a multiple of the AES
 	// block length, exclude the last full block from the main loop by
 	// subtracting 16 from LEN.  This is needed because ciphertext stealing
 	// decryption uses the last two tweaks in reverse order.  We'll handle
 	// the last full block and the partial block specially at the end.
+	lea		-16(LEN), %rax
 	test		$15, LEN
-	jnz		.Lneed_cts_dec\@
-.Lxts_init\@:
+	cmovnz		%rax, LEN
 .endif
 
+	// Load the AES key length: 16 (AES-128), 24 (AES-192), or 32 (AES-256).
+	movl		480(KEY), KEYLEN
+
 	// Setup the pointer to the round keys and cache as many as possible.
 	_setup_round_keys	\enc
 
 	// Compute the first set of tweaks TWEAK[0-3].
 	_compute_first_set_of_tweaks
@@ -659,15 +659,14 @@
 	vzeroupper
 .endif
 	RET
 
 .Lhandle_remainder\@:
-	add		$4*VL, LEN	// Undo the extra sub from earlier.
 
 	// En/decrypt any remaining full blocks, one vector at a time.
 .if VL > 16
-	sub		$VL, LEN
+	add		$3*VL, LEN	// Undo extra sub of 4*VL, then sub VL.
 	jl		.Lvec_at_a_time_done\@
 .Lvec_at_a_time\@:
 	_vmovdqu	(SRC), V0
 	_aes_crypt	\enc, , TWEAK0, V0
 	_vmovdqu	V0, (DST)
@@ -675,13 +674,13 @@
 	add		$VL, SRC
 	add		$VL, DST
 	sub		$VL, LEN
 	jge		.Lvec_at_a_time\@
 .Lvec_at_a_time_done\@:
-	add		$VL-16, LEN	// Undo the extra sub from earlier.
+	add		$VL-16, LEN	// Undo extra sub of VL, then sub 16.
 .else
-	sub		$16, LEN
+	add		$4*VL-16, LEN	// Undo extra sub of 4*VL, then sub 16.
 .endif
 
 	// En/decrypt any remaining full blocks, one at a time.
 	jl		.Lblock_at_a_time_done\@
 .Lblock_at_a_time\@:
@@ -692,28 +691,16 @@
 	add		$16, SRC
 	add		$16, DST
 	sub		$16, LEN
 	jge		.Lblock_at_a_time\@
 .Lblock_at_a_time_done\@:
-	add		$16, LEN	// Undo the extra sub from earlier.
-
-.Lfull_blocks_done\@:
-	// Now 0 <= LEN <= 15.  If LEN is nonzero, do ciphertext stealing to
-	// process the last 16 + LEN bytes.  If LEN is zero, we're done.
-	test		LEN, LEN
-	jnz		.Lcts\@
-	jmp		.Ldone\@
-
-.if !\enc
-.Lneed_cts_dec\@:
-	sub		$16, LEN
-	jmp		.Lxts_init\@
-.endif
+	add		$16, LEN	// Undo the extra sub of 16.
+	// Now 0 <= LEN <= 15.  If LEN is zero, we're done.
+	jz		.Ldone\@
 
-.Lcts\@:
-	// Do ciphertext stealing (CTS) to en/decrypt the last full block and
-	// the partial block.  TWEAK0_XMM contains the next tweak.
+	// Otherwise 1 <= LEN <= 15, but the real remaining length is 16 + LEN.
+	// Do ciphertext stealing to process the last 16 + LEN bytes.
 
 .if \enc
 	// If encrypting, the main loop already encrypted the last full block to
 	// create the CTS intermediate ciphertext.  Prepare for the rest of CTS
 	// by rewinding the pointers and loading the intermediate ciphertext.
-- 
2.44.0


  parent reply	other threads:[~2024-04-13  3:21 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-13  3:17 [PATCH 0/3] crypto: x86/aes-xts - additional tuning Eric Biggers
2024-04-13  3:17 ` [PATCH 1/3] crypto: x86/aes-xts - handle AES-128 and AES-192 more efficiently Eric Biggers
2024-04-13  3:17 ` Eric Biggers [this message]
2024-04-13  3:17 ` [PATCH 3/3] crypto: x86/aes-xts - optimize size of instructions operating on lengths Eric Biggers
2024-04-15 12:11 ` [PATCH 0/3] crypto: x86/aes-xts - additional tuning Ard Biesheuvel
2024-04-19 11:04 ` Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240413031728.159495-3-ebiggers@kernel.org \
    --to=ebiggers@kernel.org \
    --cc=chang.seok.bae@intel.com \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.