All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org, x86@kernel.org
Cc: linux-kernel@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
	Andy Lutomirski <luto@kernel.org>,
	"Chang S . Bae" <chang.seok.bae@intel.com>
Subject: [PATCH 5/6] crypto: x86/aes-xts - wire up VAES + AVX10/256 implementation
Date: Tue, 26 Mar 2024 01:03:03 -0700	[thread overview]
Message-ID: <20240326080305.402382-6-ebiggers@kernel.org> (raw)
In-Reply-To: <20240326080305.402382-1-ebiggers@kernel.org>

From: Eric Biggers <ebiggers@google.com>

Add an AES-XTS implementation "xts-aes-vaes-avx10_256" for x86_64 CPUs
with the VAES, VPCLMULQDQ, and either AVX10/256 or AVX512BW + AVX512VL
extensions.  This implementation avoids using zmm registers, instead
using ymm registers to operate on two AES blocks at a time.  The
assembly code is instantiated using a macro so that most of the source
code is shared with other implementations.

This is the optimal implementation on CPUs that support VAES and AVX512
but where the zmm registers should not be used due to downclocking
effects, for example Intel's Ice Lake.  It should also be the optimal
implementation on future CPUs that support AVX10/256 but not AVX10/512.

The performance is slightly better than that of xts-aes-vaes-avx2, which
uses the same vector length, due to factors such as being able to use
ymm16-ymm31 to cache the AES round keys.  For example, on Ice Lake, the
throughput of decrypting 4096-byte messages with AES-256-XTS is 5.8%
higher with xts-aes-vaes-avx10_256 than with xts-aes-vaes-avx2.

Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 arch/x86/crypto/aes-xts-avx-x86_64.S |  9 +++++++++
 arch/x86/crypto/aesni-intel_glue.c   | 16 ++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/arch/x86/crypto/aes-xts-avx-x86_64.S b/arch/x86/crypto/aes-xts-avx-x86_64.S
index 87ae2139b7ca..c868b9af443b 100644
--- a/arch/x86/crypto/aes-xts-avx-x86_64.S
+++ b/arch/x86/crypto/aes-xts-avx-x86_64.S
@@ -773,6 +773,15 @@ SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx2)
 	aes_xts_crypt	1
 SYM_FUNC_END(aes_xts_encrypt_vaes_avx2)
 SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx2)
 	aes_xts_crypt	0
 SYM_FUNC_END(aes_xts_decrypt_vaes_avx2)
+
+.set	VL, 32
+.set	USE_AVX10, 1
+SYM_TYPED_FUNC_START(aes_xts_encrypt_vaes_avx10_256)
+	aes_xts_crypt	1
+SYM_FUNC_END(aes_xts_encrypt_vaes_avx10_256)
+SYM_TYPED_FUNC_START(aes_xts_decrypt_vaes_avx10_256)
+	aes_xts_crypt	0
+SYM_FUNC_END(aes_xts_decrypt_vaes_avx10_256)
 #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index d958aa073c14..ac45e0b952b7 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1295,10 +1295,11 @@ static struct skcipher_alg aes_xts_alg_##suffix = {			       \
 static struct simd_skcipher_alg *aes_xts_simdalg_##suffix
 
 DEFINE_XTS_ALG(aesni_avx, "xts-aes-aesni-avx", 500);
 #if defined(CONFIG_AS_VAES) && defined(CONFIG_AS_VPCLMULQDQ)
 DEFINE_XTS_ALG(vaes_avx2, "xts-aes-vaes-avx2", 600);
+DEFINE_XTS_ALG(vaes_avx10_256, "xts-aes-vaes-avx10_256", 700);
 #endif
 
 static int __init register_xts_algs(void)
 {
 	int err;
@@ -1318,10 +1319,22 @@ static int __init register_xts_algs(void)
 		return 0;
 	err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx2, 1,
 					     &aes_xts_simdalg_vaes_avx2);
 	if (err)
 		return err;
+
+	if (!boot_cpu_has(X86_FEATURE_AVX512BW) ||
+	    !boot_cpu_has(X86_FEATURE_AVX512VL) ||
+	    !boot_cpu_has(X86_FEATURE_BMI2) ||
+	    !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
+			       XFEATURE_MASK_AVX512, NULL))
+		return 0;
+
+	err = simd_register_skciphers_compat(&aes_xts_alg_vaes_avx10_256, 1,
+					     &aes_xts_simdalg_vaes_avx10_256);
+	if (err)
+		return err;
 #endif
 	return 0;
 }
 
 static void unregister_xts_algs(void)
@@ -1330,10 +1343,13 @@ static void unregister_xts_algs(void)
 		simd_unregister_skciphers(&aes_xts_alg_aesni_avx, 1,
 					  &aes_xts_simdalg_aesni_avx);
 	if (aes_xts_simdalg_vaes_avx2)
 		simd_unregister_skciphers(&aes_xts_alg_vaes_avx2, 1,
 					  &aes_xts_simdalg_vaes_avx2);
+	if (aes_xts_simdalg_vaes_avx10_256)
+		simd_unregister_skciphers(&aes_xts_alg_vaes_avx10_256, 1,
+					  &aes_xts_simdalg_vaes_avx10_256);
 }
 #else
 static int __init register_xts_algs(void)
 {
 	return 0;
-- 
2.44.0


  parent reply	other threads:[~2024-03-26  8:06 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-26  8:02 [PATCH 0/6] Faster AES-XTS on modern x86_64 CPUs Eric Biggers
2024-03-26  8:02 ` [PATCH 1/6] x86: add kconfig symbols for assembler VAES and VPCLMULQDQ support Eric Biggers
2024-03-26  8:10   ` Ingo Molnar
2024-03-26  8:18     ` Eric Biggers
2024-03-26  8:28       ` Ingo Molnar
2024-03-26  8:03 ` [PATCH 2/6] crypto: x86/aes-xts - add AES-XTS assembly macro for modern CPUs Eric Biggers
2024-03-26  8:03 ` [PATCH 3/6] crypto: x86/aes-xts - wire up AESNI + AVX implementation Eric Biggers
2024-03-26  8:03 ` [PATCH 4/6] crypto: x86/aes-xts - wire up VAES + AVX2 implementation Eric Biggers
2024-03-26  8:03 ` Eric Biggers [this message]
2024-03-26  8:03 ` [PATCH 6/6] crypto: x86/aes-xts - wire up VAES + AVX10/512 implementation Eric Biggers
2024-03-26  8:51 ` [PATCH 0/6] Faster AES-XTS on modern x86_64 CPUs Ard Biesheuvel
2024-03-26 16:47   ` Eric Biggers
2024-04-03  8:12     ` David Laight
2024-04-04  1:35       ` Eric Biggers
2024-04-04  7:53         ` David Laight
2024-04-05 19:19           ` Eric Biggers
2024-04-08  7:41             ` David Laight
2024-04-08 12:31               ` Eric Biggers
2024-04-05  7:58 ` Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240326080305.402382-6-ebiggers@kernel.org \
    --to=ebiggers@kernel.org \
    --cc=ardb@kernel.org \
    --cc=chang.seok.bae@intel.com \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.