From: Robert Elliott <elliott@hpe.com>
To: herbert@gondor.apana.org.au, davem@davemloft.net,
tim.c.chen@linux.intel.com, linux-crypto@vger.kernel.org,
linux-kernel@vger.kernel.org
Cc: Robert Elliott <elliott@hpe.com>
Subject: [RFC PATCH 2/7] crypto: x86/sha - limit FPU preemption
Date: Thu, 6 Oct 2022 17:31:46 -0500 [thread overview]
Message-ID: <20221006223151.22159-3-elliott@hpe.com> (raw)
In-Reply-To: <20221006223151.22159-1-elliott@hpe.com>
As done by the ECB and CBC helpers in arch/x86/crypt/ecb_cbc_helpers.h,
limit the number of bytes processed between kernel_fpu_begin() and
kernel_fpu_end() calls.
Those functions call preempt_disable() and preempt_enable(), so
the CPU core is unavailable for scheduling while running.
This leads to "rcu_preempt detected expedited stalls" with stack dumps
pointing to the optimized hash function if this module is loaded and
used a lot:
rcu: INFO: rcu_preempt detected expedited stalls on CPUs/tasks: {12-... } 22 jiffies s: 277 root: 0x1/.
For example, that can occur during boot with the stack track pointing
to the sha512-x86 function if the system set to use SHA-512 for
module signing. The call trace includes:
module_sig_check
mod_verify_sig
pkcs7_verify
pkcs7_digest
sha512_finup
sha512_base_do_update
Fixes: 66be89515888 ("crypto: sha1 - SSSE3 based SHA1 implementation for x86-64")
Fixes: 8275d1aa6422 ("crypto: sha256 - Create module providing optimized SHA256 routines using SSSE3, AVX or AVX2 instructions.")
Fixes: 87de4579f92d ("crypto: sha512 - Create module providing optimized SHA512 routines using SSSE3, AVX or AVX2 instructions.")
Fixes: aa031b8f702e ("crypto: x86/sha512 - load based on CPU features")
Suggested-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Robert Elliott <elliott@hpe.com>
---
arch/x86/crypto/sha1_ssse3_glue.c | 34 +++++++++++++++++++++++-----
arch/x86/crypto/sha256_ssse3_glue.c | 35 ++++++++++++++++++++++++-----
arch/x86/crypto/sha512_ssse3_glue.c | 35 ++++++++++++++++++++++++-----
3 files changed, 89 insertions(+), 15 deletions(-)
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 4430463dee62..033812989476 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -27,10 +27,13 @@
#include <asm/cpu_device_id.h>
#include <asm/simd.h>
+#define FPU_BYTES 4096U /* avoid kernel_fpu_begin/end scheduler/rcu stalls */
+
static int sha1_update(struct shash_desc *desc, const u8 *data,
unsigned int len, sha1_block_fn *sha1_xform)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
+ unsigned int chunk;
if (!crypto_simd_usable() ||
(sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
@@ -42,9 +45,18 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
*/
BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
- kernel_fpu_begin();
- sha1_base_do_update(desc, data, len, sha1_xform);
- kernel_fpu_end();
+ do {
+ chunk = min(len, FPU_BYTES);
+ len -= chunk;
+
+ if (chunk) {
+ kernel_fpu_begin();
+ sha1_base_do_update(desc, data, chunk, sha1_xform);
+ kernel_fpu_end();
+ }
+
+ data += chunk;
+ } while (len);
return 0;
}
@@ -52,12 +64,24 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
static int sha1_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out, sha1_block_fn *sha1_xform)
{
+ unsigned int chunk;
+
if (!crypto_simd_usable())
return crypto_sha1_finup(desc, data, len, out);
+ do {
+ chunk = min(len, FPU_BYTES);
+ len -= chunk;
+
+ if (chunk) {
+ kernel_fpu_begin();
+ sha1_base_do_update(desc, data, chunk, sha1_xform);
+ kernel_fpu_end();
+ }
+ data += chunk;
+ } while (len);
+
kernel_fpu_begin();
- if (len)
- sha1_base_do_update(desc, data, len, sha1_xform);
sha1_base_do_finalize(desc, sha1_xform);
kernel_fpu_end();
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index e437fba0299b..99a25c238f40 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -40,6 +40,8 @@
#include <asm/cpu_device_id.h>
#include <asm/simd.h>
+#define FPU_BYTES 4096U /* avoid kernel_fpu_begin/end scheduler/rcu stalls */
+
asmlinkage void sha256_transform_ssse3(struct sha256_state *state,
const u8 *data, int blocks);
@@ -47,6 +49,7 @@ static int _sha256_update(struct shash_desc *desc, const u8 *data,
unsigned int len, sha256_block_fn *sha256_xform)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
+ unsigned int chunk;
if (!crypto_simd_usable() ||
(sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
@@ -58,9 +61,18 @@ static int _sha256_update(struct shash_desc *desc, const u8 *data,
*/
BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
- kernel_fpu_begin();
- sha256_base_do_update(desc, data, len, sha256_xform);
- kernel_fpu_end();
+ do {
+ chunk = min(len, FPU_BYTES);
+ len -= chunk;
+
+ if (chunk) {
+ kernel_fpu_begin();
+ sha256_base_do_update(desc, data, chunk, sha256_xform);
+ kernel_fpu_end();
+ }
+
+ data += chunk;
+ } while (len);
return 0;
}
@@ -68,12 +80,25 @@ static int _sha256_update(struct shash_desc *desc, const u8 *data,
static int sha256_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out, sha256_block_fn *sha256_xform)
{
+ unsigned int chunk;
+
if (!crypto_simd_usable())
return crypto_sha256_finup(desc, data, len, out);
+ do {
+ chunk = min(len, FPU_BYTES);
+ len -= chunk;
+
+ if (chunk) {
+ kernel_fpu_begin();
+ sha256_base_do_update(desc, data, chunk, sha256_xform);
+ kernel_fpu_end();
+ }
+
+ data += chunk;
+ } while (len);
+
kernel_fpu_begin();
- if (len)
- sha256_base_do_update(desc, data, len, sha256_xform);
sha256_base_do_finalize(desc, sha256_xform);
kernel_fpu_end();
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 3c19f803f288..72eee03448dc 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -39,6 +39,8 @@
#include <asm/cpu_device_id.h>
#include <asm/simd.h>
+#define FPU_BYTES 4096U /* avoid kernel_fpu_begin/end scheduler/rcu stalls */
+
asmlinkage void sha512_transform_ssse3(struct sha512_state *state,
const u8 *data, int blocks);
@@ -46,6 +48,7 @@ static int sha512_update(struct shash_desc *desc, const u8 *data,
unsigned int len, sha512_block_fn *sha512_xform)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
+ unsigned int chunk;
if (!crypto_simd_usable() ||
(sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
@@ -57,9 +60,18 @@ static int sha512_update(struct shash_desc *desc, const u8 *data,
*/
BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0);
- kernel_fpu_begin();
- sha512_base_do_update(desc, data, len, sha512_xform);
- kernel_fpu_end();
+ do {
+ chunk = min(len, FPU_BYTES);
+ len -= chunk;
+
+ if (chunk) {
+ kernel_fpu_begin();
+ sha512_base_do_update(desc, data, chunk, sha512_xform);
+ kernel_fpu_end();
+ }
+
+ data += chunk;
+ } while (len);
return 0;
}
@@ -67,12 +79,25 @@ static int sha512_update(struct shash_desc *desc, const u8 *data,
static int sha512_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out, sha512_block_fn *sha512_xform)
{
+ unsigned int chunk;
+
if (!crypto_simd_usable())
return crypto_sha512_finup(desc, data, len, out);
+ do {
+ chunk = min(len, FPU_BYTES);
+ len -= chunk;
+
+ if (chunk) {
+ kernel_fpu_begin();
+ sha512_base_do_update(desc, data, chunk, sha512_xform);
+ kernel_fpu_end();
+ }
+
+ data += chunk;
+ } while (len);
+
kernel_fpu_begin();
- if (len)
- sha512_base_do_update(desc, data, len, sha512_xform);
sha512_base_do_finalize(desc, sha512_xform);
kernel_fpu_end();
--
2.37.3
next prev parent reply other threads:[~2022-10-06 22:32 UTC|newest]
Thread overview: 126+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-06 22:31 [RFC PATCH 0/7] crypto: x86 - fix RCU stalls Robert Elliott
2022-10-06 22:31 ` [RFC PATCH 1/7] rcu: correct CONFIG_EXT_RCU_CPU_STALL_TIMEOUT descriptions Robert Elliott
2022-10-06 22:31 ` Robert Elliott [this message]
2022-10-06 22:31 ` [RFC PATCH 3/7] crypto: x86/crc - limit FPU preemption Robert Elliott
2022-10-06 22:31 ` [RFC PATCH 4/7] crypto: x86/sm3 " Robert Elliott
2022-10-06 22:31 ` [RFC PATCH 5/7] crypto: x86/ghash - restructure FPU context saving Robert Elliott
2022-10-06 22:31 ` [RFC PATCH 6/7] crypto: x86/ghash - limit FPU preemption Robert Elliott
2022-10-06 22:31 ` [RFC PATCH 7/7] crypto: x86 - use common macro for FPU limit Robert Elliott
2022-10-12 21:59 ` [PATCH v2 00/19] crypto: x86 - fix RCU stalls Robert Elliott
2022-10-12 21:59 ` [PATCH v2 01/19] crypto: tcrypt - test crc32 Robert Elliott
2022-10-12 21:59 ` [PATCH v2 02/19] crypto: tcrypt - test nhpoly1305 Robert Elliott
2022-10-12 21:59 ` [PATCH v2 03/19] crypto: tcrypt - reschedule during cycles speed tests Robert Elliott
2022-10-12 21:59 ` [PATCH v2 04/19] crypto: x86/sha - limit FPU preemption Robert Elliott
2022-10-13 0:41 ` Jason A. Donenfeld
2022-10-13 21:50 ` Elliott, Robert (Servers)
2022-10-14 11:01 ` David Laight
2022-10-13 5:57 ` Eric Biggers
2022-10-13 6:04 ` Herbert Xu
2022-10-13 6:08 ` Eric Biggers
2022-10-13 7:50 ` Herbert Xu
2022-10-13 22:41 ` :Re: " Elliott, Robert (Servers)
2022-10-12 21:59 ` [PATCH v2 05/19] crypto: x86/crc " Robert Elliott
2022-10-13 2:00 ` Herbert Xu
2022-10-13 22:34 ` Elliott, Robert (Servers)
2022-10-14 4:02 ` David Laight
2022-10-24 2:03 ` kernel test robot
2022-10-12 21:59 ` [PATCH v2 06/19] crypto: x86/sm3 " Robert Elliott
2022-10-12 21:59 ` [PATCH v2 07/19] crypto: x86/ghash - restructure FPU context saving Robert Elliott
2022-10-12 21:59 ` [PATCH v2 08/19] crypto: x86/ghash - limit FPU preemption Robert Elliott
2022-10-13 6:03 ` Eric Biggers
2022-10-13 22:52 ` Elliott, Robert (Servers)
2022-10-12 21:59 ` [PATCH v2 09/19] crypto: x86 - use common macro for FPU limit Robert Elliott
2022-10-13 0:35 ` Jason A. Donenfeld
2022-10-13 21:48 ` Elliott, Robert (Servers)
2022-10-14 1:26 ` Jason A. Donenfeld
2022-10-18 0:06 ` Elliott, Robert (Servers)
2022-10-12 21:59 ` [PATCH v2 10/19] crypto: x86/sha1, sha256 - load based on CPU features Robert Elliott
2022-10-12 21:59 ` [PATCH v2 11/19] crypto: x86/crc " Robert Elliott
2022-10-12 21:59 ` [PATCH v2 12/19] crypto: x86/sm3 " Robert Elliott
2022-10-12 21:59 ` [PATCH v2 13/19] crypto: x86/ghash " Robert Elliott
2022-10-12 21:59 ` [PATCH v2 14/19] crypto: x86 " Robert Elliott
2022-10-14 14:26 ` Elliott, Robert (Servers)
2022-10-12 21:59 ` [PATCH v2 15/19] crypto: x86 - add pr_fmt to all modules Robert Elliott
2022-10-12 21:59 ` [PATCH v2 16/19] crypto: x86 - print CPU optimized loaded messages Robert Elliott
2022-10-13 0:40 ` Jason A. Donenfeld
2022-10-13 13:47 ` kernel test robot
2022-10-13 13:48 ` kernel test robot
2022-10-12 21:59 ` [PATCH v2 17/19] crypto: x86 - standardize suboptimal prints Robert Elliott
2022-10-13 0:38 ` Jason A. Donenfeld
2022-10-12 21:59 ` [PATCH v2 18/19] crypto: x86 - standardize not loaded prints Robert Elliott
2022-10-13 0:42 ` Jason A. Donenfeld
2022-10-13 22:20 ` Elliott, Robert (Servers)
2022-11-10 22:06 ` Elliott, Robert (Servers)
2022-10-12 21:59 ` [PATCH v2 19/19] crypto: x86/sha - register only the best function Robert Elliott
2022-10-13 6:07 ` Eric Biggers
2022-10-13 7:52 ` Herbert Xu
2022-10-13 22:59 ` Elliott, Robert (Servers)
2022-10-14 8:22 ` Herbert Xu
2022-11-01 21:34 ` [PATCH v2 00/19] crypto: x86 - fix RCU stalls Elliott, Robert (Servers)
2022-11-03 4:27 ` [PATCH v3 00/17] crypt: " Robert Elliott
2022-11-03 4:27 ` [PATCH v3 01/17] crypto: tcrypt - test crc32 Robert Elliott
2022-11-03 4:27 ` [PATCH v3 02/17] crypto: tcrypt - test nhpoly1305 Robert Elliott
2022-11-03 4:27 ` [PATCH v3 03/17] crypto: tcrypt - reschedule during cycles speed tests Robert Elliott
2022-11-03 4:27 ` [PATCH v3 04/17] crypto: x86/sha - limit FPU preemption Robert Elliott
2022-11-03 4:27 ` [PATCH v3 05/17] crypto: x86/crc " Robert Elliott
2022-11-03 4:27 ` [PATCH v3 06/17] crypto: x86/sm3 " Robert Elliott
2022-11-03 4:27 ` [PATCH v3 07/17] crypto: x86/ghash - use u8 rather than char Robert Elliott
2022-11-03 4:27 ` [PATCH v3 08/17] crypto: x86/ghash - restructure FPU context saving Robert Elliott
2022-11-03 4:27 ` [PATCH v3 09/17] crypto: x86/ghash - limit FPU preemption Robert Elliott
2022-11-03 4:27 ` [PATCH v3 10/17] crypto: x86/*poly* " Robert Elliott
2022-11-03 4:27 ` [PATCH v3 11/17] crypto: x86/sha - register all variations Robert Elliott
2022-11-03 9:26 ` kernel test robot
2022-11-03 4:27 ` [PATCH v3 12/17] crypto: x86/sha - minimize time in FPU context Robert Elliott
2022-11-03 4:27 ` [PATCH v3 13/17] crypto: x86/sha1, sha256 - load based on CPU features Robert Elliott
2022-11-03 4:27 ` [PATCH v3 14/17] crypto: x86/crc " Robert Elliott
2022-11-03 4:27 ` [PATCH v3 15/17] crypto: x86/sm3 " Robert Elliott
2022-11-03 4:27 ` [PATCH v3 16/17] crypto: x86/ghash,polyval " Robert Elliott
2022-11-03 4:27 ` [PATCH v3 17/17] crypto: x86/nhpoly1305, poly1305 " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 00/24] crypto: fix RCU stalls Robert Elliott
2022-11-16 4:13 ` [PATCH v4 01/24] crypto: tcrypt - test crc32 Robert Elliott
2022-11-16 4:13 ` [PATCH v4 02/24] crypto: tcrypt - test nhpoly1305 Robert Elliott
2022-11-16 4:13 ` [PATCH v4 03/24] crypto: tcrypt - reschedule during cycles speed tests Robert Elliott
2022-11-16 4:13 ` [PATCH v4 04/24] crypto: x86/sha - limit FPU preemption Robert Elliott
2022-11-16 4:13 ` [PATCH v4 05/24] crypto: x86/crc " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 06/24] crypto: x86/sm3 " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 07/24] crypto: x86/ghash - use u8 rather than char Robert Elliott
2022-11-16 4:13 ` [PATCH v4 08/24] crypto: x86/ghash - restructure FPU context saving Robert Elliott
2022-11-16 4:13 ` [PATCH v4 09/24] crypto: x86/ghash - limit FPU preemption Robert Elliott
2022-11-16 4:13 ` [PATCH v4 10/24] crypto: x86/poly " Robert Elliott
2022-11-16 11:13 ` Jason A. Donenfeld
2022-11-22 5:06 ` Elliott, Robert (Servers)
2022-11-22 9:07 ` David Laight
2022-11-25 8:40 ` Herbert Xu
2022-11-25 8:59 ` Ard Biesheuvel
2022-11-25 9:03 ` Herbert Xu
2022-11-28 16:57 ` Elliott, Robert (Servers)
2022-11-28 18:48 ` Elliott, Robert (Servers)
2022-12-02 6:21 ` Elliott, Robert (Servers)
2022-12-02 9:25 ` Herbert Xu
2022-12-02 16:15 ` Elliott, Robert (Servers)
2022-12-06 4:27 ` Herbert Xu
2022-12-06 14:03 ` Peter Lafreniere
2022-12-06 14:44 ` David Laight
2022-12-06 23:06 ` Peter Lafreniere
2022-12-10 0:34 ` Elliott, Robert (Servers)
2022-12-16 22:12 ` Elliott, Robert (Servers)
2022-11-16 4:13 ` [PATCH v4 11/24] crypto: x86/aegis " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 12/24] crypto: x86/sha - register all variations Robert Elliott
2022-11-16 4:13 ` [PATCH v4 13/24] crypto: x86/sha - minimize time in FPU context Robert Elliott
2022-11-16 4:13 ` [PATCH v4 14/24] crypto: x86/sha - load based on CPU features Robert Elliott
2022-11-16 4:13 ` [PATCH v4 15/24] crypto: x86/crc " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 16/24] crypto: x86/sm3 " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 17/24] crypto: x86/poly " Robert Elliott
2022-11-16 11:19 ` Jason A. Donenfeld
2022-11-16 4:13 ` [PATCH v4 18/24] crypto: x86/ghash " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 19/24] crypto: x86/aesni - avoid type conversions Robert Elliott
2022-11-16 4:13 ` [PATCH v4 20/24] crypto: x86/ciphers - load based on CPU features Robert Elliott
2022-11-16 11:30 ` Jason A. Donenfeld
2022-11-16 4:13 ` [PATCH v4 21/24] crypto: x86 - report used CPU features via module parameters Robert Elliott
2022-11-16 11:26 ` Jason A. Donenfeld
2022-11-16 4:13 ` [PATCH v4 22/24] crypto: x86 - report missing " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 23/24] crypto: x86 - report suboptimal CPUs " Robert Elliott
2022-11-16 4:13 ` [PATCH v4 24/24] crypto: x86 - standarize module descriptions Robert Elliott
2022-11-17 3:58 ` [PATCH v4 00/24] crypto: fix RCU stalls Herbert Xu
2022-11-17 15:13 ` Elliott, Robert (Servers)
2022-11-17 15:15 ` Jason A. Donenfeld
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221006223151.22159-3-elliott@hpe.com \
--to=elliott@hpe.com \
--cc=davem@davemloft.net \
--cc=herbert@gondor.apana.org.au \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=tim.c.chen@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).