linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: mark.rutland@arm.com, herbert@gondor.apana.org.au,
	Eric Biggers <ebiggers@google.com>,
	catalin.marinas@arm.com, Dave Martin <dave.martin@arm.com>,
	will@kernel.org, Ard Biesheuvel <ardb@kernel.org>,
	linux-arm-kernel@lists.infradead.org
Subject: [PATCH v2 8/9] crypto: arm64/crc-t10dif - move NEON yield to C code
Date: Wed,  3 Feb 2021 12:36:25 +0100	[thread overview]
Message-ID: <20210203113626.220151-9-ardb@kernel.org> (raw)
In-Reply-To: <20210203113626.220151-1-ardb@kernel.org>

Instead of yielding from the bowels of the asm routine if a reschedule
is needed, divide up the input into 4 KB chunks in the C glue. This
simplifies the code substantially, and avoids scheduling out the task
with the asm routine on the call stack, which is undesirable from a
CFI/instrumentation point of view.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/crypto/crct10dif-ce-core.S | 43 +++++---------------
 arch/arm64/crypto/crct10dif-ce-glue.c | 30 +++++++++++---
 2 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
index 111d9c9abddd..dce6dcebfca1 100644
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -68,10 +68,10 @@
 	.text
 	.arch		armv8-a+crypto
 
-	init_crc	.req	w19
-	buf		.req	x20
-	len		.req	x21
-	fold_consts_ptr	.req	x22
+	init_crc	.req	w0
+	buf		.req	x1
+	len		.req	x2
+	fold_consts_ptr	.req	x3
 
 	fold_consts	.req	v10
 
@@ -257,12 +257,6 @@ CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
 	.endm
 
 	.macro		crc_t10dif_pmull, p
-	frame_push	4, 128
-
-	mov		init_crc, w0
-	mov		buf, x1
-	mov		len, x2
-
 	__pmull_init_\p
 
 	// For sizes less than 256 bytes, we can't fold 128 bytes at a time.
@@ -317,26 +311,7 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 	fold_32_bytes	\p, v6, v7
 
 	subs		len, len, #128
-	b.lt		.Lfold_128_bytes_loop_done_\@
-
-	if_will_cond_yield_neon
-	stp		q0, q1, [sp, #.Lframe_local_offset]
-	stp		q2, q3, [sp, #.Lframe_local_offset + 32]
-	stp		q4, q5, [sp, #.Lframe_local_offset + 64]
-	stp		q6, q7, [sp, #.Lframe_local_offset + 96]
-	do_cond_yield_neon
-	ldp		q0, q1, [sp, #.Lframe_local_offset]
-	ldp		q2, q3, [sp, #.Lframe_local_offset + 32]
-	ldp		q4, q5, [sp, #.Lframe_local_offset + 64]
-	ldp		q6, q7, [sp, #.Lframe_local_offset + 96]
-	ld1		{fold_consts.2d}, [fold_consts_ptr]
-	__pmull_init_\p
-	__pmull_pre_\p	fold_consts
-	endif_yield_neon
-
-	b		.Lfold_128_bytes_loop_\@
-
-.Lfold_128_bytes_loop_done_\@:
+	b.ge		.Lfold_128_bytes_loop_\@
 
 	// Now fold the 112 bytes in v0-v6 into the 16 bytes in v7.
 
@@ -453,7 +428,9 @@ CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
 	// Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of v0.
 
 	umov		w0, v0.h[0]
-	frame_pop
+	.ifc		\p, p8
+	ldp		x29, x30, [sp], #16
+	.endif
 	ret
 
 .Lless_than_256_bytes_\@:
@@ -489,7 +466,9 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 // Assumes len >= 16.
 //
 SYM_FUNC_START(crc_t10dif_pmull_p8)
-	crc_t10dif_pmull	p8
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
+	crc_t10dif_pmull p8
 SYM_FUNC_END(crc_t10dif_pmull_p8)
 
 	.align		5
diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c
index ccc3f6067742..09eb1456aed4 100644
--- a/arch/arm64/crypto/crct10dif-ce-glue.c
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -37,9 +37,18 @@ static int crct10dif_update_pmull_p8(struct shash_desc *desc, const u8 *data,
 	u16 *crc = shash_desc_ctx(desc);
 
 	if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) {
-		kernel_neon_begin();
-		*crc = crc_t10dif_pmull_p8(*crc, data, length);
-		kernel_neon_end();
+		do {
+			unsigned int chunk = length;
+
+			if (chunk > SZ_4K + CRC_T10DIF_PMULL_CHUNK_SIZE)
+				chunk = SZ_4K;
+
+			kernel_neon_begin();
+			*crc = crc_t10dif_pmull_p8(*crc, data, chunk);
+			kernel_neon_end();
+			data += chunk;
+			length -= chunk;
+		} while (length);
 	} else {
 		*crc = crc_t10dif_generic(*crc, data, length);
 	}
@@ -53,9 +62,18 @@ static int crct10dif_update_pmull_p64(struct shash_desc *desc, const u8 *data,
 	u16 *crc = shash_desc_ctx(desc);
 
 	if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) {
-		kernel_neon_begin();
-		*crc = crc_t10dif_pmull_p64(*crc, data, length);
-		kernel_neon_end();
+		do {
+			unsigned int chunk = length;
+
+			if (chunk > SZ_4K + CRC_T10DIF_PMULL_CHUNK_SIZE)
+				chunk = SZ_4K;
+
+			kernel_neon_begin();
+			*crc = crc_t10dif_pmull_p64(*crc, data, chunk);
+			kernel_neon_end();
+			data += chunk;
+			length -= chunk;
+		} while (length);
 	} else {
 		*crc = crc_t10dif_generic(*crc, data, length);
 	}
-- 
2.30.0


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2021-02-03 11:38 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-03 11:36 [PATCH v2 0/9] arm64: rework NEON yielding to avoid scheduling from asm code Ard Biesheuvel
2021-02-03 11:36 ` [PATCH v2 1/9] arm64: assembler: add cond_yield macro Ard Biesheuvel
2021-02-03 11:36 ` [PATCH v2 2/9] crypto: arm64/sha1-ce - simplify NEON yield Ard Biesheuvel
2021-02-03 11:36 ` [PATCH v2 3/9] crypto: arm64/sha2-ce " Ard Biesheuvel
2021-02-03 11:36 ` [PATCH v2 4/9] crypto: arm64/sha3-ce " Ard Biesheuvel
2021-02-03 11:36 ` [PATCH v2 5/9] crypto: arm64/sha512-ce " Ard Biesheuvel
2021-02-03 11:36 ` [PATCH v2 6/9] crypto: arm64/aes-neonbs - remove NEON yield calls Ard Biesheuvel
2021-02-03 11:36 ` [PATCH v2 7/9] crypto: arm64/aes-ce-mac - simplify NEON yield Ard Biesheuvel
2021-02-03 11:36 ` Ard Biesheuvel [this message]
2021-02-03 11:36 ` [PATCH v2 9/9] arm64: assembler: remove conditional NEON yield macros Ard Biesheuvel
2021-02-03 21:31 ` (subset) Re: [PATCH v2 0/9] arm64: rework NEON yielding to avoid scheduling from asm code Will Deacon
2021-02-04  2:44   ` Herbert Xu
2021-02-04  8:29     ` Ard Biesheuvel
2021-02-04 11:10       ` Herbert Xu
2021-02-04 13:03         ` Will Deacon
2021-02-04 19:45           ` Herbert Xu
2021-02-04 10:33   ` Will Deacon
2021-02-10  7:23 ` Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210203113626.220151-9-ardb@kernel.org \
    --to=ardb@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=dave.martin@arm.com \
    --cc=ebiggers@google.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).