Linux-Crypto Archive on lore.kernel.org
 help / color / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: Ard Biesheuvel <ardb@kernel.org>,
	Herbert Xu <herbert@gondor.apana.org.au>,
	David Miller <davem@davemloft.net>,
	"Jason A . Donenfeld" <Jason@zx2c4.com>,
	Samuel Neves <sneves@dei.uc.pt>, Arnd Bergmann <arnd@arndb.de>,
	Eric Biggers <ebiggers@google.com>,
	Andy Lutomirski <luto@kernel.org>,
	Martin Willi <martin@strongswan.org>,
	Rene van Dorst <opensource@vdorst.com>,
	David Sterba <dsterba@suse.com>
Subject: [PATCH v5 04/34] crypto: x86/chacha - expose SIMD ChaCha routine as library function
Date: Fri,  8 Nov 2019 13:22:10 +0100
Message-ID: <20191108122240.28479-5-ardb@kernel.org> (raw)
In-Reply-To: <20191108122240.28479-1-ardb@kernel.org>

Wire the existing x86 SIMD ChaCha code into the new ChaCha library
interface, so that users of the library interface will get the
accelerated version when available.

Given that calls into the library API will always go through the
routines in this module if it is enabled, switch to static keys
to select the optimal implementation available (which may be none
at all, in which case we defer to the generic implementation for
all invocations).

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/crypto/chacha_glue.c | 91 ++++++++++++++------
 crypto/Kconfig                |  1 +
 include/crypto/chacha.h       |  6 ++
 3 files changed, 73 insertions(+), 25 deletions(-)

diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 0aabb382edce..b391e13a9e41 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -21,24 +21,24 @@ asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
 asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
 					unsigned int len, int nrounds);
 asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
-#ifdef CONFIG_AS_AVX2
+
 asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
 				       unsigned int len, int nrounds);
 asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
 				       unsigned int len, int nrounds);
 asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
 				       unsigned int len, int nrounds);
-static bool chacha_use_avx2;
-#ifdef CONFIG_AS_AVX512
+
 asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
 					   unsigned int len, int nrounds);
 asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
 					   unsigned int len, int nrounds);
 asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
 					   unsigned int len, int nrounds);
-static bool chacha_use_avx512vl;
-#endif
-#endif
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);
 
 static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
 {
@@ -49,9 +49,8 @@ static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
 static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 			  unsigned int bytes, int nrounds)
 {
-#ifdef CONFIG_AS_AVX2
-#ifdef CONFIG_AS_AVX512
-	if (chacha_use_avx512vl) {
+	if (IS_ENABLED(CONFIG_AS_AVX512) &&
+	    static_branch_likely(&chacha_use_avx512vl)) {
 		while (bytes >= CHACHA_BLOCK_SIZE * 8) {
 			chacha_8block_xor_avx512vl(state, dst, src, bytes,
 						   nrounds);
@@ -79,8 +78,9 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 			return;
 		}
 	}
-#endif
-	if (chacha_use_avx2) {
+
+	if (IS_ENABLED(CONFIG_AS_AVX2) &&
+	    static_branch_likely(&chacha_use_avx2)) {
 		while (bytes >= CHACHA_BLOCK_SIZE * 8) {
 			chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
 			bytes -= CHACHA_BLOCK_SIZE * 8;
@@ -104,7 +104,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 			return;
 		}
 	}
-#endif
+
 	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
 		chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
 		bytes -= CHACHA_BLOCK_SIZE * 4;
@@ -123,6 +123,43 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 	}
 }
 
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+	state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+	if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
+		hchacha_block_generic(state, stream, nrounds);
+	} else {
+		kernel_fpu_begin();
+		hchacha_block_ssse3(state, stream, nrounds);
+		kernel_fpu_end();
+	}
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+	state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+	chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+		       int nrounds)
+{
+	state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+	if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
+	    bytes <= CHACHA_BLOCK_SIZE)
+		return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+	kernel_fpu_begin();
+	chacha_dosimd(state, dst, src, bytes, nrounds);
+	kernel_fpu_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
 static int chacha_simd_stream_xor(struct skcipher_request *req,
 				  const struct chacha_ctx *ctx, const u8 *iv)
 {
@@ -143,7 +180,8 @@ static int chacha_simd_stream_xor(struct skcipher_request *req,
 		if (nbytes < walk.total)
 			nbytes = round_down(nbytes, walk.stride);
 
-		if (!crypto_simd_usable()) {
+		if (!static_branch_likely(&chacha_use_simd) ||
+		    !crypto_simd_usable()) {
 			chacha_crypt_generic(state, walk.dst.virt.addr,
 					     walk.src.virt.addr, nbytes,
 					     ctx->nrounds);
@@ -246,18 +284,21 @@ static struct skcipher_alg algs[] = {
 static int __init chacha_simd_mod_init(void)
 {
 	if (!boot_cpu_has(X86_FEATURE_SSSE3))
-		return -ENODEV;
-
-#ifdef CONFIG_AS_AVX2
-	chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
-			  boot_cpu_has(X86_FEATURE_AVX2) &&
-			  cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
-#ifdef CONFIG_AS_AVX512
-	chacha_use_avx512vl = chacha_use_avx2 &&
-			      boot_cpu_has(X86_FEATURE_AVX512VL) &&
-			      boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
-#endif
-#endif
+		return 0;
+
+	static_branch_enable(&chacha_use_simd);
+
+	if (IS_ENABLED(CONFIG_AS_AVX2) &&
+	    boot_cpu_has(X86_FEATURE_AVX) &&
+	    boot_cpu_has(X86_FEATURE_AVX2) &&
+	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
+		static_branch_enable(&chacha_use_avx2);
+
+		if (IS_ENABLED(CONFIG_AS_AVX512) &&
+		    boot_cpu_has(X86_FEATURE_AVX512VL) &&
+		    boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
+			static_branch_enable(&chacha_use_avx512vl);
+	}
 	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
 }
 
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 1abca30ed6ae..07762de1237f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1437,6 +1437,7 @@ config CRYPTO_CHACHA20_X86_64
 	depends on X86 && 64BIT
 	select CRYPTO_SKCIPHER
 	select CRYPTO_LIB_CHACHA_GENERIC
+	select CRYPTO_ARCH_HAVE_LIB_CHACHA
 	help
 	  SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
 	  XChaCha20, and XChaCha12 stream ciphers.
diff --git a/include/crypto/chacha.h b/include/crypto/chacha.h
index 5c662f8fecac..2676f4fbd4c1 100644
--- a/include/crypto/chacha.h
+++ b/include/crypto/chacha.h
@@ -25,6 +25,12 @@
 #define CHACHA_BLOCK_SIZE	64
 #define CHACHAPOLY_IV_SIZE	12
 
+#ifdef CONFIG_X86_64
+#define CHACHA_STATE_WORDS	((CHACHA_BLOCK_SIZE + 12) / sizeof(u32))
+#else
+#define CHACHA_STATE_WORDS	(CHACHA_BLOCK_SIZE / sizeof(u32))
+#endif
+
 /* 192-bit nonce, then 64-bit stream position */
 #define XCHACHA_IV_SIZE		32
 
-- 
2.20.1


  parent reply index

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-08 12:22 [PATCH v5 00/34] crypto: crypto API library interfaces for WireGuard Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 01/34] crypto: tidy up lib/crypto Kconfig and Makefile Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 02/34] crypto: chacha - move existing library code into lib/crypto Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 03/34] crypto: x86/chacha - depend on generic chacha library instead of crypto driver Ard Biesheuvel
2019-11-08 12:22 ` Ard Biesheuvel [this message]
2019-11-08 12:22 ` [PATCH v5 05/34] crypto: arm64/chacha " Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 06/34] crypto: arm64/chacha - expose arm64 ChaCha routine as library function Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 07/34] crypto: arm/chacha - import Eric Biggers's scalar accelerated ChaCha code Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 08/34] crypto: arm/chacha - remove dependency on generic ChaCha driver Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 09/34] crypto: arm/chacha - expose ARM ChaCha routine as library function Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 10/34] crypto: mips/chacha - import 32r2 ChaCha code from Zinc Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 11/34] crypto: mips/chacha - wire up accelerated 32r2 " Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 12/34] crypto: chacha - unexport chacha_generic routines Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 13/34] crypto: poly1305 - move core routines into a separate library Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 14/34] crypto: x86/poly1305 - unify Poly1305 state struct with generic code Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 15/34] crypto: poly1305 - expose init/update/final library interface Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 16/34] crypto: x86/poly1305 - depend on generic library not generic shash Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 17/34] crypto: x86/poly1305 - expose existing driver as poly1305 library Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 18/34] crypto: arm64/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 19/34] crypto: arm/poly1305 " Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 20/34] crypto: mips/poly1305 - incorporate OpenSSL/CRYPTOGAMS optimized implementation Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 21/34] int128: move __uint128_t compiler test to Kconfig Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 22/34] crypto: BLAKE2s - generic C library implementation and selftest Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 23/34] crypto: testmgr - add test cases for Blake2s Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 24/34] crypto: blake2s - implement generic shash driver Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 25/34] crypto: BLAKE2s - x86_64 SIMD implementation Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 26/34] crypto: Curve25519 - generic C library implementations Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 27/34] crypto: curve25519 - add kpp selftest Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 28/34] crypto: curve25519 - implement generic KPP driver Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 29/34] crypto: lib/curve25519 - work around Clang stack spilling issue Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 30/34] crypto: Curve25519 - x86_64 library and KPP implementations Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 31/34] crypto: arm - import Bernstein and Schwabe's Curve25519 ARM implementation Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 32/34] crypto: arm/Curve25519 - wire up NEON implementation Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 33/34] crypto: chacha20poly1305 - import construction and selftest from Zinc Ard Biesheuvel
2019-11-08 12:22 ` [PATCH v5 34/34] crypto: lib/chacha20poly1305 - reimplement crypt_from_sg() routine Ard Biesheuvel
2019-11-15  6:07 ` [PATCH v5 00/34] crypto: crypto API library interfaces for WireGuard Herbert Xu
     [not found]   ` <CAHmME9oOfhv6RN00m1c6c5qELC5dzFKS=mgDBQ-stVEWu00p_A@mail.gmail.com>
2019-11-15  9:09     ` Herbert Xu
2019-11-19 15:18       ` Jason A. Donenfeld
2019-11-19 15:34         ` Ard Biesheuvel
2019-11-19 15:44           ` Jason A. Donenfeld
2019-11-19 15:59             ` Ard Biesheuvel
2019-11-19 16:23             ` Eric Biggers
2019-11-19 21:43               ` Jordan Glover

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191108122240.28479-5-ardb@kernel.org \
    --to=ardb@kernel.org \
    --cc=Jason@zx2c4.com \
    --cc=arnd@arndb.de \
    --cc=davem@davemloft.net \
    --cc=dsterba@suse.com \
    --cc=ebiggers@google.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=linux-crypto@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=martin@strongswan.org \
    --cc=opensource@vdorst.com \
    --cc=sneves@dei.uc.pt \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-Crypto Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-crypto/0 linux-crypto/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-crypto linux-crypto/ https://lore.kernel.org/linux-crypto \
		linux-crypto@vger.kernel.org
	public-inbox-index linux-crypto

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-crypto


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git