From: Robert Elliott <elliott@hpe.com>
To: herbert@gondor.apana.org.au, davem@davemloft.net,
tim.c.chen@linux.intel.com, ap420073@gmail.com, ardb@kernel.org,
Jason@zx2c4.com, David.Laight@ACULAB.COM, ebiggers@kernel.org,
linux-crypto@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: Robert Elliott <elliott@hpe.com>
Subject: [PATCH v4 10/24] crypto: x86/poly - limit FPU preemption
Date: Tue, 15 Nov 2022 22:13:28 -0600 [thread overview]
Message-ID: <20221116041342.3841-11-elliott@hpe.com> (raw)
In-Reply-To: <20221116041342.3841-1-elliott@hpe.com>
Use a static const unsigned int for the limit of the number of bytes
processed between kernel_fpu_begin() and kernel_fpu_end() rather than
using the SZ_4K macro (which is a signed value), or a magic value
of 4096U embedded in the C code.
Use unsigned int rather than size_t for some of the arguments to
avoid typecasting for the min() macro.
Signed-off-by: Robert Elliott <elliott@hpe.com>
---
v3 use static int rather than macro, change to while loops
rather than do/while loops
---
arch/x86/crypto/nhpoly1305-avx2-glue.c | 11 +++++---
arch/x86/crypto/nhpoly1305-sse2-glue.c | 11 +++++---
arch/x86/crypto/poly1305_glue.c | 37 +++++++++++++++++---------
arch/x86/crypto/polyval-clmulni_glue.c | 8 ++++--
4 files changed, 46 insertions(+), 21 deletions(-)
diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c
index 8ea5ab0f1ca7..f7dc9c563bb5 100644
--- a/arch/x86/crypto/nhpoly1305-avx2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c
@@ -13,6 +13,9 @@
#include <linux/sizes.h>
#include <asm/simd.h>
+/* avoid kernel_fpu_begin/end scheduler/rcu stalls */
+static const unsigned int bytes_per_fpu = 337 * 1024;
+
asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
u8 hash[NH_HASH_BYTES]);
@@ -26,18 +29,20 @@ static void _nh_avx2(const u32 *key, const u8 *message, size_t message_len,
static int nhpoly1305_avx2_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
+ BUILD_BUG_ON(bytes_per_fpu == 0);
+
if (srclen < 64 || !crypto_simd_usable())
return crypto_nhpoly1305_update(desc, src, srclen);
- do {
- unsigned int n = min_t(unsigned int, srclen, SZ_4K);
+ while (srclen) {
+ unsigned int n = min(srclen, bytes_per_fpu);
kernel_fpu_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2);
kernel_fpu_end();
src += n;
srclen -= n;
- } while (srclen);
+ }
return 0;
}
diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c
index 2b353d42ed13..daffcc7019ad 100644
--- a/arch/x86/crypto/nhpoly1305-sse2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c
@@ -13,6 +13,9 @@
#include <linux/sizes.h>
#include <asm/simd.h>
+/* avoid kernel_fpu_begin/end scheduler/rcu stalls */
+static const unsigned int bytes_per_fpu = 199 * 1024;
+
asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
u8 hash[NH_HASH_BYTES]);
@@ -26,18 +29,20 @@ static void _nh_sse2(const u32 *key, const u8 *message, size_t message_len,
static int nhpoly1305_sse2_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
+ BUILD_BUG_ON(bytes_per_fpu == 0);
+
if (srclen < 64 || !crypto_simd_usable())
return crypto_nhpoly1305_update(desc, src, srclen);
- do {
- unsigned int n = min_t(unsigned int, srclen, SZ_4K);
+ while (srclen) {
+ unsigned int n = min(srclen, bytes_per_fpu);
kernel_fpu_begin();
crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2);
kernel_fpu_end();
src += n;
srclen -= n;
- } while (srclen);
+ }
return 0;
}
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 1dfb8af48a3c..16831c036d71 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -15,20 +15,27 @@
#include <asm/intel-family.h>
#include <asm/simd.h>
+#define POLY1305_BLOCK_SIZE_MASK (~(POLY1305_BLOCK_SIZE - 1))
+
+/* avoid kernel_fpu_begin/end scheduler/rcu stalls */
+static const unsigned int bytes_per_fpu = 217 * 1024;
+
asmlinkage void poly1305_init_x86_64(void *ctx,
const u8 key[POLY1305_BLOCK_SIZE]);
asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
- const size_t len, const u32 padbit);
+ const unsigned int len,
+ const u32 padbit);
asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
const u32 nonce[4]);
asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
const u32 nonce[4]);
-asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len,
- const u32 padbit);
-asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len,
- const u32 padbit);
+asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp,
+ const unsigned int len, const u32 padbit);
+asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp,
+ const unsigned int len, const u32 padbit);
asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp,
- const size_t len, const u32 padbit);
+ const unsigned int len,
+ const u32 padbit);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx);
static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
@@ -86,14 +93,12 @@ static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_BLOCK_SIZE])
poly1305_init_x86_64(ctx, key);
}
-static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
+static void poly1305_simd_blocks(void *ctx, const u8 *inp, unsigned int len,
const u32 padbit)
{
struct poly1305_arch_internal *state = ctx;
- /* SIMD disables preemption, so relax after processing each page. */
- BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE ||
- SZ_4K % POLY1305_BLOCK_SIZE);
+ BUILD_BUG_ON(bytes_per_fpu < POLY1305_BLOCK_SIZE);
if (!static_branch_likely(&poly1305_use_avx) ||
(len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
@@ -103,8 +108,14 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
return;
}
- do {
- const size_t bytes = min_t(size_t, len, SZ_4K);
+ while (len) {
+ unsigned int bytes;
+
+ if (len < POLY1305_BLOCK_SIZE)
+ bytes = len;
+ else
+ bytes = min(len,
+ bytes_per_fpu & POLY1305_BLOCK_SIZE_MASK);
kernel_fpu_begin();
if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
@@ -117,7 +128,7 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
len -= bytes;
inp += bytes;
- } while (len);
+ }
}
static void poly1305_simd_emit(void *ctx, u8 mac[POLY1305_DIGEST_SIZE],
diff --git a/arch/x86/crypto/polyval-clmulni_glue.c b/arch/x86/crypto/polyval-clmulni_glue.c
index b7664d018851..de1c908f7412 100644
--- a/arch/x86/crypto/polyval-clmulni_glue.c
+++ b/arch/x86/crypto/polyval-clmulni_glue.c
@@ -29,6 +29,9 @@
#define NUM_KEY_POWERS 8
+/* avoid kernel_fpu_begin/end scheduler/rcu stalls */
+static const unsigned int bytes_per_fpu = 393 * 1024;
+
struct polyval_tfm_ctx {
/*
* These powers must be in the order h^8, ..., h^1.
@@ -107,6 +110,8 @@ static int polyval_x86_update(struct shash_desc *desc,
unsigned int nblocks;
unsigned int n;
+ BUILD_BUG_ON(bytes_per_fpu < POLYVAL_BLOCK_SIZE);
+
if (dctx->bytes) {
n = min(srclen, dctx->bytes);
pos = dctx->buffer + POLYVAL_BLOCK_SIZE - dctx->bytes;
@@ -123,8 +128,7 @@ static int polyval_x86_update(struct shash_desc *desc,
}
while (srclen >= POLYVAL_BLOCK_SIZE) {
- /* Allow rescheduling every 4K bytes. */
- nblocks = min(srclen, 4096U) / POLYVAL_BLOCK_SIZE;
+ nblocks = min(srclen, bytes_per_fpu) / POLYVAL_BLOCK_SIZE;
internal_polyval_update(tctx, src, nblocks, dctx->buffer);
srclen -= nblocks * POLYVAL_BLOCK_SIZE;
src += nblocks * POLYVAL_BLOCK_SIZE;
--
2.38.1
next prev parent reply other threads:[~2022-11-16 4:14 UTC|newest]
Thread overview: 126+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-06 22:31 [RFC PATCH 0/7] crypto: x86 - fix RCU stalls Robert Elliott
2022-10-06 22:31 ` [RFC PATCH 1/7] rcu: correct CONFIG_EXT_RCU_CPU_STALL_TIMEOUT descriptions Robert Elliott
2022-10-06 22:31 ` [RFC PATCH 2/7] crypto: x86/sha - limit FPU preemption Robert Elliott
2022-10-06 22:31 ` [RFC PATCH 3/7] crypto: x86/crc " Robert Elliott
2022-10-06 22:31 ` [RFC PATCH 4/7] crypto: x86/sm3 " Robert Elliott
2022-10-06 22:31 ` [RFC PATCH 5/7] crypto: x86/ghash - restructure FPU context saving Robert Elliott
2022-10-06 22:31 ` [RFC PATCH 6/7] crypto: x86/ghash - limit FPU preemption Robert Elliott
2022-10-06 22:31 ` [RFC PATCH 7/7] crypto: x86 - use common macro for FPU limit Robert Elliott
2022-10-12 21:59 ` [PATCH v2 00/19] crypto: x86 - fix RCU stalls Robert Elliott
2022-10-12 21:59 ` [PATCH v2 01/19] crypto: tcrypt - test crc32 Robert Elliott
2022-10-12 21:59 ` [PATCH v2 02/19] crypto: tcrypt - test nhpoly1305 Robert Elliott
2022-10-12 21:59 ` [PATCH v2 03/19] crypto: tcrypt - reschedule during cycles speed tests Robert Elliott
2022-10-12 21:59 ` [PATCH v2 04/19] crypto: x86/sha - limit FPU preemption Robert Elliott
2022-10-13 0:41 ` Jason A. Donenfeld
2022-10-13 21:50 ` Elliott, Robert (Servers)
2022-10-14 11:01 ` David Laight
2022-10-13 5:57 ` Eric Biggers
2022-10-13 6:04 ` Herbert Xu
2022-10-13 6:08 ` Eric Biggers
2022-10-13 7:50 ` Herbert Xu
2022-10-13 22:41 ` :Re: " Elliott, Robert (Servers)
2022-10-12 21:59 ` [PATCH v2 05/19] crypto: x86/crc " Robert Elliott
2022-10-13 2:00 ` Herbert Xu
2022-10-13 22:34 ` Elliott, Robert (Servers)
2022-10-14 4:02 ` David Laight
2022-10-24 2:03 ` kernel test robot
2022-10-12 21:59 ` [PATCH v2 06/19] crypto: x86/sm3 " Robert Elliott
2022-10-12 21:59 ` [PATCH v2 07/19] crypto: x86/ghash - restructure FPU context saving Robert Elliott
2022-10-12 21:59 ` [PATCH v2 08/19] crypto: x86/ghash - limit FPU preemption Robert Elliott
2022-10-13 6:03 ` Eric Biggers
2022-10-13 22:52 ` Elliott, Robert (Servers)
2022-10-12 21:59 ` [PATCH v2 09/19] crypto: x86 - use common macro for FPU limit Robert Elliott
2022-10-13 0:35 ` Jason A. Donenfeld
2022-10-13 21:48 ` Elliott, Robert (Servers)
2022-10-14 1:26 ` Jason A. Donenfeld
2022-10-18 0:06 ` Elliott, Robert (Servers)
2022-10-12 21:59 ` [PATCH v2 10/19] crypto: x86/sha1, sha256 - load based on CPU features Robert Elliott
2022-10-12 21:59 ` [PATCH v2 11/19] crypto: x86/crc " Robert Elliott
2022-10-12 21:59 ` [PATCH v2 12/19] crypto: x86/sm3 " Robert Elliott
2022-10-12 21:59 ` [PATCH v2 13/19] crypto: x86/ghash " Robert Elliott
2022-10-12 21:59 ` [PATCH v2 14/19] crypto: x86 " Robert Elliott
2022-10-14 14:26 ` Elliott, Robert (Servers)
2022-10-12 21:59 ` [PATCH v2 15/19] crypto: x86 - add pr_fmt to all modules Robert Elliott
2022-10-12 21:59 ` [PATCH v2 16/19] crypto: x86 - print CPU optimized loaded messages Robert Elliott
2022-10-13 0:40 ` Jason A. Donenfeld
2022-10-13 13:47 ` kernel test robot
2022-10-13 13:48 ` kernel test robot
2022-10-12 21:59 ` [PATCH v2 17/19] crypto: x86 - standardize suboptimal prints Robert Elliott
2022-10-13 0:38 ` Jason A. Donenfeld
2022-10-12 21:59 ` [PATCH v2 18/19] crypto: x86 - standardize not loaded prints Robert Elliott
2022-10-13 0:42 ` Jason A. Donenfeld
2022-10-13 22:20 ` Elliott, Robert (Servers)
2022-11-10 22:06 ` Elliott, Robert (Servers)
2022-10-12 21:59 ` [PATCH v2 19/19] crypto: x86/sha - register only the best function Robert Elliott
2022-10-13 6:07 ` Eric Biggers
2022-10-13 7:52 ` Herbert Xu
2022-10-13 22:59 ` Elliott, Robert (Servers)
2022-10-14 8:22 ` Herbert Xu
2022-11-01 21:34 ` [PATCH v2 00/19] crypto: x86 - fix RCU stalls Elliott, Robert (Servers)
2022-11-03 4:27 ` [PATCH v3 00/17] crypt: " Robert Elliott
2022-11-03 4:27 ` [PATCH v3 01/17] crypto: tcrypt - test crc32 Robert Elliott
2022-11-03 4:27 ` [PATCH v3 02/17] crypto: tcrypt - test nhpoly1305 Robert Elliott
2022-11-03 4:27 ` [PATCH v3 03/17] crypto: tcrypt - reschedule during cycles speed tests Robert Elliott
2022-11-03 4:27 ` [PATCH v3 04/17] crypto: x86/sha - limit FPU preemption Robert Elliott
2022-11-03 4:27 ` [PATCH v3 05/17] crypto: x86/crc " Robert Elliott
2022-11-03 4:27 ` [PATCH v3 06/17] crypto: x86/sm3 " Robert Elliott
2022-11-03 4:27 ` [PATCH v3 07/17] crypto: x86/ghash - use u8 rather than char Robert Elliott
2022-11-03 4:27 ` [PATCH v3 08/17] crypto: x86/ghash - restructure FPU context saving Robert Elliott
2022-11-03 4:27 ` [PATCH v3 09/17] crypto: x86/ghash - limit FPU preemption Robert Elliott
2022-11-03 4:27 ` [PATCH v3 10/17] crypto: x86/*poly* " Robert Elliott
2022-11-03 4:27 ` [PATCH v3 11/17] crypto: x86/sha - register all variations Robert Elliott
2022-11-03 9:26 ` kernel test robot
2022-11-03 4:27 ` [PATCH v3 12/17] crypto: x86/sha - minimize time in FPU context Robert Elliott
2022-11-03 4:27 ` [PATCH v3 13/17] crypto: x86/sha1, sha256 - load based on CPU features Robert Elliott
2022-11-03 4:27 ` [PATCH v3 14/17] crypto: x86/crc " Robert Elliott
2022-11-03 4:27 ` [PATCH v3 15/17] crypto: x86/sm3 " Robert Elliott
2022-11-03 4:27 ` [PATCH v3 16/17] crypto: x86/ghash,polyval " Robert Elliott
2022-11-03 4:27 ` [PATCH v3 17/17] crypto: x86/nhpoly1305, poly1305 " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 00/24] crypto: fix RCU stalls Robert Elliott
2022-11-16 4:13 ` [PATCH v4 01/24] crypto: tcrypt - test crc32 Robert Elliott
2022-11-16 4:13 ` [PATCH v4 02/24] crypto: tcrypt - test nhpoly1305 Robert Elliott
2022-11-16 4:13 ` [PATCH v4 03/24] crypto: tcrypt - reschedule during cycles speed tests Robert Elliott
2022-11-16 4:13 ` [PATCH v4 04/24] crypto: x86/sha - limit FPU preemption Robert Elliott
2022-11-16 4:13 ` [PATCH v4 05/24] crypto: x86/crc " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 06/24] crypto: x86/sm3 " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 07/24] crypto: x86/ghash - use u8 rather than char Robert Elliott
2022-11-16 4:13 ` [PATCH v4 08/24] crypto: x86/ghash - restructure FPU context saving Robert Elliott
2022-11-16 4:13 ` [PATCH v4 09/24] crypto: x86/ghash - limit FPU preemption Robert Elliott
2022-11-16 4:13 ` Robert Elliott [this message]
2022-11-16 11:13 ` [PATCH v4 10/24] crypto: x86/poly " Jason A. Donenfeld
2022-11-22 5:06 ` Elliott, Robert (Servers)
2022-11-22 9:07 ` David Laight
2022-11-25 8:40 ` Herbert Xu
2022-11-25 8:59 ` Ard Biesheuvel
2022-11-25 9:03 ` Herbert Xu
2022-11-28 16:57 ` Elliott, Robert (Servers)
2022-11-28 18:48 ` Elliott, Robert (Servers)
2022-12-02 6:21 ` Elliott, Robert (Servers)
2022-12-02 9:25 ` Herbert Xu
2022-12-02 16:15 ` Elliott, Robert (Servers)
2022-12-06 4:27 ` Herbert Xu
2022-12-06 14:03 ` Peter Lafreniere
2022-12-06 14:44 ` David Laight
2022-12-06 23:06 ` Peter Lafreniere
2022-12-10 0:34 ` Elliott, Robert (Servers)
2022-12-16 22:12 ` Elliott, Robert (Servers)
2022-11-16 4:13 ` [PATCH v4 11/24] crypto: x86/aegis " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 12/24] crypto: x86/sha - register all variations Robert Elliott
2022-11-16 4:13 ` [PATCH v4 13/24] crypto: x86/sha - minimize time in FPU context Robert Elliott
2022-11-16 4:13 ` [PATCH v4 14/24] crypto: x86/sha - load based on CPU features Robert Elliott
2022-11-16 4:13 ` [PATCH v4 15/24] crypto: x86/crc " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 16/24] crypto: x86/sm3 " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 17/24] crypto: x86/poly " Robert Elliott
2022-11-16 11:19 ` Jason A. Donenfeld
2022-11-16 4:13 ` [PATCH v4 18/24] crypto: x86/ghash " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 19/24] crypto: x86/aesni - avoid type conversions Robert Elliott
2022-11-16 4:13 ` [PATCH v4 20/24] crypto: x86/ciphers - load based on CPU features Robert Elliott
2022-11-16 11:30 ` Jason A. Donenfeld
2022-11-16 4:13 ` [PATCH v4 21/24] crypto: x86 - report used CPU features via module parameters Robert Elliott
2022-11-16 11:26 ` Jason A. Donenfeld
2022-11-16 4:13 ` [PATCH v4 22/24] crypto: x86 - report missing " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 23/24] crypto: x86 - report suboptimal CPUs " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 24/24] crypto: x86 - standarize module descriptions Robert Elliott
2022-11-17 3:58 ` [PATCH v4 00/24] crypto: fix RCU stalls Herbert Xu
2022-11-17 15:13 ` Elliott, Robert (Servers)
2022-11-17 15:15 ` Jason A. Donenfeld
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221116041342.3841-11-elliott@hpe.com \
--to=elliott@hpe.com \
--cc=David.Laight@ACULAB.COM \
--cc=Jason@zx2c4.com \
--cc=ap420073@gmail.com \
--cc=ardb@kernel.org \
--cc=davem@davemloft.net \
--cc=ebiggers@kernel.org \
--cc=herbert@gondor.apana.org.au \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=tim.c.chen@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).