All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
	Dave Martin <dave.martin@arm.com>,
	Mark Brown <broonie@kernel.org>,
	Herbert Xu <herbert@gondor.apana.org.au>,
	Eric Biggers <ebiggers@kernel.org>, Will Deacon <will@kernel.org>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Peter Zijlstra <peterz@infradead.org>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	Ingo Molnar <mingo@kernel.org>
Subject: [RFC PATCH 3/5] crypto: arm64/gcm-aes-ce - add NEON yield support
Date: Fri, 18 Dec 2020 18:01:04 +0100	[thread overview]
Message-ID: <20201218170106.23280-4-ardb@kernel.org> (raw)
In-Reply-To: <20201218170106.23280-1-ardb@kernel.org>

GCM mode is typically used for IPsec, but is now also used for software
kTLS, which means that we may end up operating on much larger inputs.

So let's wire up conditional NEON yield support for this driver, to ensure
that it does not cause scheduling blackouts when used with large inputs.

Given that GCM executes at 1-2 cycles per bytes and operates on 64 byte
chunks, doing a yield check every iteration should limit the scheduling
(or softirq) latency to < 200 cycles, which is a very conservative upper
bound.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/crypto/ghash-ce-core.S | 115 ++++++++++++--------
 1 file changed, 67 insertions(+), 48 deletions(-)

diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index 7868330dd54e..533cfb810232 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -435,14 +435,23 @@ SYM_FUNC_END(pmull_ghash_update_p8)
 
 	.align		6
 	.macro		pmull_gcm_do_crypt, enc
-	stp		x29, x30, [sp, #-32]!
-	mov		x29, sp
-	str		x19, [sp, #24]
+	frame_push	8
 
-	load_round_keys	x7, x6, x8
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
+	mov		x24, x5
+	mov		x25, x6
+	mov		x26, x7
 
-	ld1		{SHASH.2d}, [x3], #16
-	ld1		{HH.2d-HH4.2d}, [x3]
+.Lrestart\enc:
+	load_round_keys	x26, x25, x8
+
+	add		x0, x22, #16
+	ld1		{SHASH.2d}, [x22]
+	ld1		{HH.2d-HH4.2d}, [x0]
 
 	trn1		SHASH2.2d, SHASH.2d, HH.2d
 	trn2		T1.2d, SHASH.2d, HH.2d
@@ -452,23 +461,23 @@ SYM_FUNC_END(pmull_ghash_update_p8)
 	trn2		T1.2d, HH3.2d, HH4.2d
 	eor		HH34.16b, HH34.16b, T1.16b
 
-	ld1		{XL.2d}, [x4]
+	ld1		{XL.2d}, [x23]
 
-	cbz		x0, 3f				// tag only?
+	cbz		x19, 3f				// tag only?
 
-	ldr		w8, [x5, #12]			// load lower counter
+	ldr		w8, [x24, #12]			// load lower counter
 CPU_LE(	rev		w8, w8		)
 
 0:	mov		w9, #4				// max blocks per round
-	add		x10, x0, #0xf
+	add		x10, x19, #0xf
 	lsr		x10, x10, #4			// remaining blocks
 
-	subs		x0, x0, #64
+	subs		x19, x19, #64
 	csel		w9, w10, w9, mi
 	add		w8, w8, w9
 
 	bmi		1f
-	ld1		{INP0.16b-INP3.16b}, [x2], #64
+	ld1		{INP0.16b-INP3.16b}, [x21], #64
 	.subsection	1
 	/*
 	 * Populate the four input registers right to left with up to 63 bytes
@@ -487,30 +496,30 @@ CPU_LE(	rev		w8, w8		)
 	 * input size is < 16 bytes)
 	 */
 1:	mov		x15, #16
-	ands		x19, x0, #0xf
-	csel		x19, x19, x15, ne
+	ands		x0, x19, #0xf
+	csel		x0, x0, x15, ne
 	adr_l		x17, .Lpermute_table + 16
 
-	sub		x11, x15, x19
+	sub		x11, x15, x0
 	add		x12, x17, x11
 	sub		x17, x17, x11
 	ld1		{T1.16b}, [x12]
-	sub		x10, x1, x11
-	sub		x11, x2, x11
+	sub		x10, x20, x11
+	sub		x11, x21, x11
 
-	cmp		x0, #-16
+	cmp		x19, #-16
 	csel		x14, x15, xzr, gt
-	cmp		x0, #-32
+	cmp		x19, #-32
 	csel		x15, x15, xzr, gt
-	cmp		x0, #-48
-	csel		x16, x19, xzr, gt
-	csel		x1, x1, x10, gt
-	csel		x2, x2, x11, gt
-
-	ld1		{INP0.16b}, [x2], x14
-	ld1		{INP1.16b}, [x2], x15
-	ld1		{INP2.16b}, [x2], x16
-	ld1		{INP3.16b}, [x2]
+	cmp		x19, #-48
+	csel		x16, x0, xzr, gt
+	csel		x20, x20, x10, gt
+	csel		x21, x21, x11, gt
+
+	ld1		{INP0.16b}, [x21], x14
+	ld1		{INP1.16b}, [x21], x15
+	ld1		{INP2.16b}, [x21], x16
+	ld1		{INP3.16b}, [x21]
 	tbl		INP3.16b, {INP3.16b}, T1.16b
 	b		2f
 	.previous
@@ -521,14 +530,24 @@ CPU_LE(	rev		w8, w8		)
 
 	bl		pmull_gcm_enc_4x
 
-	tbnz		x0, #63, 6f
-	st1		{INP0.16b-INP3.16b}, [x1], #64
+	tbnz		x19, #63, 6f
+	st1		{INP0.16b-INP3.16b}, [x20], #64
 	.if		\enc == 1
 	bl		pmull_gcm_ghash_4x
 	.endif
-	bne		0b
 
-3:	ldp		x19, x10, [sp, #24]
+	beq		3f
+
+	if_will_cond_yield_neon
+CPU_LE(	rev		w8, w8		)
+	str		w8, [x24, #12]			// store lower counter
+	st1		{XL.2d}, [x23]
+	do_cond_yield_neon
+	b		.Lrestart\enc
+	endif_yield_neon
+	b		0b
+
+3:	ldr		x10, [sp, #.Lframe_local_offset]
 	cbz		x10, 5f				// output tag?
 
 	ld1		{INP3.16b}, [x10]		// load lengths[]
@@ -536,10 +555,10 @@ CPU_LE(	rev		w8, w8		)
 	bl		pmull_gcm_ghash_4x
 
 	mov		w11, #(0x1 << 24)		// BE '1U'
-	ld1		{KS0.16b}, [x5]
+	ld1		{KS0.16b}, [x24]
 	mov		KS0.s[3], w11
 
-	enc_block	KS0, x7, x6, x12
+	enc_block	KS0, x26, x25, x12
 
 	ext		XL.16b, XL.16b, XL.16b, #8
 	rev64		XL.16b, XL.16b
@@ -548,7 +567,7 @@ CPU_LE(	rev		w8, w8		)
 	.if		\enc == 1
 	st1		{XL.16b}, [x10]			// store tag
 	.else
-	ldp		x11, x12, [sp, #40]		// load tag pointer and authsize
+	ldp		x11, x12, [sp, #.Lframe_local_offset + 8] // load tag pointer and authsize
 	adr_l		x17, .Lpermute_table
 	ld1		{KS0.16b}, [x11]		// load supplied tag
 	add		x17, x17, x12
@@ -561,33 +580,33 @@ CPU_LE(	rev		w8, w8		)
 	smov		w0, v0.b[0]			// return b0
 	.endif
 
-4:	ldp		x29, x30, [sp], #32
+4:	frame_pop
 	ret
 
 5:
 CPU_LE(	rev		w8, w8		)
-	str		w8, [x5, #12]			// store lower counter
-	st1		{XL.2d}, [x4]
+	str		w8, [x24, #12]			// store lower counter
+	st1		{XL.2d}, [x23]
 	b		4b
 
 6:	ld1		{T1.16b-T2.16b}, [x17], #32	// permute vectors
-	sub		x17, x17, x19, lsl #1
+	sub		x17, x17, x0, lsl #1
 
 	cmp		w9, #1
 	beq		7f
 	.subsection	1
-7:	ld1		{INP2.16b}, [x1]
+7:	ld1		{INP2.16b}, [x20]
 	tbx		INP2.16b, {INP3.16b}, T1.16b
 	mov		INP3.16b, INP2.16b
 	b		8f
 	.previous
 
-	st1		{INP0.16b}, [x1], x14
-	st1		{INP1.16b}, [x1], x15
-	st1		{INP2.16b}, [x1], x16
+	st1		{INP0.16b}, [x20], x14
+	st1		{INP1.16b}, [x20], x15
+	st1		{INP2.16b}, [x20], x16
 	tbl		INP3.16b, {INP3.16b}, T1.16b
 	tbx		INP3.16b, {INP2.16b}, T2.16b
-8:	st1		{INP3.16b}, [x1]
+8:	st1		{INP3.16b}, [x20]
 
 	.if		\enc == 1
 	ld1		{T1.16b}, [x17]
@@ -699,7 +718,7 @@ SYM_FUNC_START_LOCAL(pmull_gcm_ghash_4x)
 SYM_FUNC_END(pmull_gcm_ghash_4x)
 
 SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x)
-	ld1		{KS0.16b}, [x5]			// load upper counter
+	ld1		{KS0.16b}, [x24]		// load upper counter
 	sub		w10, w8, #4
 	sub		w11, w8, #3
 	sub		w12, w8, #2
@@ -716,13 +735,13 @@ SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x)
 	ins		KS2.s[3], w12
 	ins		KS3.s[3], w13
 
-	add		x10, x6, #96			// round key pointer
+	add		x10, x25, #96			// round key pointer
 	ld1		{K6.4s-K7.4s}, [x10], #32
 	.irp		key, K0, K1, K2, K3, K4, K5
 	enc_qround	KS0, KS1, KS2, KS3, \key
 	.endr
 
-	tbnz		x7, #2, .Lnot128
+	tbnz		x26, #2, .Lnot128
 	.subsection	1
 .Lnot128:
 	ld1		{K8.4s-K9.4s}, [x10], #32
@@ -733,7 +752,7 @@ SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x)
 	.irp		key, K8, K9
 	enc_qround	KS0, KS1, KS2, KS3, \key
 	.endr
-	tbz		x7, #1, .Lout192
+	tbz		x26, #1, .Lout192
 	b		.Lout256
 	.previous
 
-- 
2.17.1


WARNING: multiple messages have this Message-ID (diff)
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: Ingo Molnar <mingo@kernel.org>,
	Herbert Xu <herbert@gondor.apana.org.au>,
	Peter Zijlstra <peterz@infradead.org>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	linux-kernel@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
	Eric Biggers <ebiggers@kernel.org>,
	Mark Brown <broonie@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Will Deacon <will@kernel.org>, Dave Martin <dave.martin@arm.com>,
	linux-arm-kernel@lists.infradead.org
Subject: [RFC PATCH 3/5] crypto: arm64/gcm-aes-ce - add NEON yield support
Date: Fri, 18 Dec 2020 18:01:04 +0100	[thread overview]
Message-ID: <20201218170106.23280-4-ardb@kernel.org> (raw)
In-Reply-To: <20201218170106.23280-1-ardb@kernel.org>

GCM mode is typically used for IPsec, but is now also used for software
kTLS, which means that we may end up operating on much larger inputs.

So let's wire up conditional NEON yield support for this driver, to ensure
that it does not cause scheduling blackouts when used with large inputs.

Given that GCM executes at 1-2 cycles per bytes and operates on 64 byte
chunks, doing a yield check every iteration should limit the scheduling
(or softirq) latency to < 200 cycles, which is a very conservative upper
bound.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/crypto/ghash-ce-core.S | 115 ++++++++++++--------
 1 file changed, 67 insertions(+), 48 deletions(-)

diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index 7868330dd54e..533cfb810232 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -435,14 +435,23 @@ SYM_FUNC_END(pmull_ghash_update_p8)
 
 	.align		6
 	.macro		pmull_gcm_do_crypt, enc
-	stp		x29, x30, [sp, #-32]!
-	mov		x29, sp
-	str		x19, [sp, #24]
+	frame_push	8
 
-	load_round_keys	x7, x6, x8
+	mov		x19, x0
+	mov		x20, x1
+	mov		x21, x2
+	mov		x22, x3
+	mov		x23, x4
+	mov		x24, x5
+	mov		x25, x6
+	mov		x26, x7
 
-	ld1		{SHASH.2d}, [x3], #16
-	ld1		{HH.2d-HH4.2d}, [x3]
+.Lrestart\enc:
+	load_round_keys	x26, x25, x8
+
+	add		x0, x22, #16
+	ld1		{SHASH.2d}, [x22]
+	ld1		{HH.2d-HH4.2d}, [x0]
 
 	trn1		SHASH2.2d, SHASH.2d, HH.2d
 	trn2		T1.2d, SHASH.2d, HH.2d
@@ -452,23 +461,23 @@ SYM_FUNC_END(pmull_ghash_update_p8)
 	trn2		T1.2d, HH3.2d, HH4.2d
 	eor		HH34.16b, HH34.16b, T1.16b
 
-	ld1		{XL.2d}, [x4]
+	ld1		{XL.2d}, [x23]
 
-	cbz		x0, 3f				// tag only?
+	cbz		x19, 3f				// tag only?
 
-	ldr		w8, [x5, #12]			// load lower counter
+	ldr		w8, [x24, #12]			// load lower counter
 CPU_LE(	rev		w8, w8		)
 
 0:	mov		w9, #4				// max blocks per round
-	add		x10, x0, #0xf
+	add		x10, x19, #0xf
 	lsr		x10, x10, #4			// remaining blocks
 
-	subs		x0, x0, #64
+	subs		x19, x19, #64
 	csel		w9, w10, w9, mi
 	add		w8, w8, w9
 
 	bmi		1f
-	ld1		{INP0.16b-INP3.16b}, [x2], #64
+	ld1		{INP0.16b-INP3.16b}, [x21], #64
 	.subsection	1
 	/*
 	 * Populate the four input registers right to left with up to 63 bytes
@@ -487,30 +496,30 @@ CPU_LE(	rev		w8, w8		)
 	 * input size is < 16 bytes)
 	 */
 1:	mov		x15, #16
-	ands		x19, x0, #0xf
-	csel		x19, x19, x15, ne
+	ands		x0, x19, #0xf
+	csel		x0, x0, x15, ne
 	adr_l		x17, .Lpermute_table + 16
 
-	sub		x11, x15, x19
+	sub		x11, x15, x0
 	add		x12, x17, x11
 	sub		x17, x17, x11
 	ld1		{T1.16b}, [x12]
-	sub		x10, x1, x11
-	sub		x11, x2, x11
+	sub		x10, x20, x11
+	sub		x11, x21, x11
 
-	cmp		x0, #-16
+	cmp		x19, #-16
 	csel		x14, x15, xzr, gt
-	cmp		x0, #-32
+	cmp		x19, #-32
 	csel		x15, x15, xzr, gt
-	cmp		x0, #-48
-	csel		x16, x19, xzr, gt
-	csel		x1, x1, x10, gt
-	csel		x2, x2, x11, gt
-
-	ld1		{INP0.16b}, [x2], x14
-	ld1		{INP1.16b}, [x2], x15
-	ld1		{INP2.16b}, [x2], x16
-	ld1		{INP3.16b}, [x2]
+	cmp		x19, #-48
+	csel		x16, x0, xzr, gt
+	csel		x20, x20, x10, gt
+	csel		x21, x21, x11, gt
+
+	ld1		{INP0.16b}, [x21], x14
+	ld1		{INP1.16b}, [x21], x15
+	ld1		{INP2.16b}, [x21], x16
+	ld1		{INP3.16b}, [x21]
 	tbl		INP3.16b, {INP3.16b}, T1.16b
 	b		2f
 	.previous
@@ -521,14 +530,24 @@ CPU_LE(	rev		w8, w8		)
 
 	bl		pmull_gcm_enc_4x
 
-	tbnz		x0, #63, 6f
-	st1		{INP0.16b-INP3.16b}, [x1], #64
+	tbnz		x19, #63, 6f
+	st1		{INP0.16b-INP3.16b}, [x20], #64
 	.if		\enc == 1
 	bl		pmull_gcm_ghash_4x
 	.endif
-	bne		0b
 
-3:	ldp		x19, x10, [sp, #24]
+	beq		3f
+
+	if_will_cond_yield_neon
+CPU_LE(	rev		w8, w8		)
+	str		w8, [x24, #12]			// store lower counter
+	st1		{XL.2d}, [x23]
+	do_cond_yield_neon
+	b		.Lrestart\enc
+	endif_yield_neon
+	b		0b
+
+3:	ldr		x10, [sp, #.Lframe_local_offset]
 	cbz		x10, 5f				// output tag?
 
 	ld1		{INP3.16b}, [x10]		// load lengths[]
@@ -536,10 +555,10 @@ CPU_LE(	rev		w8, w8		)
 	bl		pmull_gcm_ghash_4x
 
 	mov		w11, #(0x1 << 24)		// BE '1U'
-	ld1		{KS0.16b}, [x5]
+	ld1		{KS0.16b}, [x24]
 	mov		KS0.s[3], w11
 
-	enc_block	KS0, x7, x6, x12
+	enc_block	KS0, x26, x25, x12
 
 	ext		XL.16b, XL.16b, XL.16b, #8
 	rev64		XL.16b, XL.16b
@@ -548,7 +567,7 @@ CPU_LE(	rev		w8, w8		)
 	.if		\enc == 1
 	st1		{XL.16b}, [x10]			// store tag
 	.else
-	ldp		x11, x12, [sp, #40]		// load tag pointer and authsize
+	ldp		x11, x12, [sp, #.Lframe_local_offset + 8] // load tag pointer and authsize
 	adr_l		x17, .Lpermute_table
 	ld1		{KS0.16b}, [x11]		// load supplied tag
 	add		x17, x17, x12
@@ -561,33 +580,33 @@ CPU_LE(	rev		w8, w8		)
 	smov		w0, v0.b[0]			// return b0
 	.endif
 
-4:	ldp		x29, x30, [sp], #32
+4:	frame_pop
 	ret
 
 5:
 CPU_LE(	rev		w8, w8		)
-	str		w8, [x5, #12]			// store lower counter
-	st1		{XL.2d}, [x4]
+	str		w8, [x24, #12]			// store lower counter
+	st1		{XL.2d}, [x23]
 	b		4b
 
 6:	ld1		{T1.16b-T2.16b}, [x17], #32	// permute vectors
-	sub		x17, x17, x19, lsl #1
+	sub		x17, x17, x0, lsl #1
 
 	cmp		w9, #1
 	beq		7f
 	.subsection	1
-7:	ld1		{INP2.16b}, [x1]
+7:	ld1		{INP2.16b}, [x20]
 	tbx		INP2.16b, {INP3.16b}, T1.16b
 	mov		INP3.16b, INP2.16b
 	b		8f
 	.previous
 
-	st1		{INP0.16b}, [x1], x14
-	st1		{INP1.16b}, [x1], x15
-	st1		{INP2.16b}, [x1], x16
+	st1		{INP0.16b}, [x20], x14
+	st1		{INP1.16b}, [x20], x15
+	st1		{INP2.16b}, [x20], x16
 	tbl		INP3.16b, {INP3.16b}, T1.16b
 	tbx		INP3.16b, {INP2.16b}, T2.16b
-8:	st1		{INP3.16b}, [x1]
+8:	st1		{INP3.16b}, [x20]
 
 	.if		\enc == 1
 	ld1		{T1.16b}, [x17]
@@ -699,7 +718,7 @@ SYM_FUNC_START_LOCAL(pmull_gcm_ghash_4x)
 SYM_FUNC_END(pmull_gcm_ghash_4x)
 
 SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x)
-	ld1		{KS0.16b}, [x5]			// load upper counter
+	ld1		{KS0.16b}, [x24]		// load upper counter
 	sub		w10, w8, #4
 	sub		w11, w8, #3
 	sub		w12, w8, #2
@@ -716,13 +735,13 @@ SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x)
 	ins		KS2.s[3], w12
 	ins		KS3.s[3], w13
 
-	add		x10, x6, #96			// round key pointer
+	add		x10, x25, #96			// round key pointer
 	ld1		{K6.4s-K7.4s}, [x10], #32
 	.irp		key, K0, K1, K2, K3, K4, K5
 	enc_qround	KS0, KS1, KS2, KS3, \key
 	.endr
 
-	tbnz		x7, #2, .Lnot128
+	tbnz		x26, #2, .Lnot128
 	.subsection	1
 .Lnot128:
 	ld1		{K8.4s-K9.4s}, [x10], #32
@@ -733,7 +752,7 @@ SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x)
 	.irp		key, K8, K9
 	enc_qround	KS0, KS1, KS2, KS3, \key
 	.endr
-	tbz		x7, #1, .Lout192
+	tbz		x26, #1, .Lout192
 	b		.Lout256
 	.previous
 
-- 
2.17.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2020-12-18 17:04 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-18 17:01 [RFC PATCH 0/5] running kernel mode SIMD with softirqs disabled Ard Biesheuvel
2020-12-18 17:01 ` Ard Biesheuvel
2020-12-18 17:01 ` [RFC PATCH 1/5] crypto: aead - disallow en/decrypt for non-task or non-softirq context Ard Biesheuvel
2020-12-18 17:01   ` Ard Biesheuvel
2020-12-18 17:01 ` [RFC PATCH 2/5] crypto: skcipher " Ard Biesheuvel
2020-12-18 17:01   ` Ard Biesheuvel
2020-12-18 17:01 ` Ard Biesheuvel [this message]
2020-12-18 17:01   ` [RFC PATCH 3/5] crypto: arm64/gcm-aes-ce - add NEON yield support Ard Biesheuvel
2020-12-18 17:01 ` [RFC PATCH 4/5] arm64: fpsimd: run kernel mode NEON with softirqs disabled Ard Biesheuvel
2020-12-18 17:01   ` Ard Biesheuvel
2021-01-19 16:00   ` Dave Martin
2021-01-19 16:00     ` Dave Martin
2021-01-19 16:29     ` Ard Biesheuvel
2021-01-19 16:29       ` Ard Biesheuvel
2021-01-20 15:44       ` Dave Martin
2021-01-20 15:44         ` Dave Martin
2021-02-15 18:30         ` Ard Biesheuvel
2021-02-15 18:30           ` Ard Biesheuvel
2020-12-18 17:01 ` [RFC PATCH 5/5] crypto: arm64/gcm-aes-ce - remove non-SIMD fallback path Ard Biesheuvel
2020-12-18 17:01   ` Ard Biesheuvel
2020-12-19  2:04 ` [RFC PATCH 0/5] running kernel mode SIMD with softirqs disabled Herbert Xu
2020-12-19  2:04   ` Herbert Xu
2021-01-14  8:22   ` Ard Biesheuvel
2021-01-14  8:22     ` Ard Biesheuvel
2021-02-16 10:09 ` Peter Zijlstra
2021-02-16 10:09   ` Peter Zijlstra
2021-02-16 10:35   ` Ard Biesheuvel
2021-02-16 10:35     ` Ard Biesheuvel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201218170106.23280-4-ardb@kernel.org \
    --to=ardb@kernel.org \
    --cc=bigeasy@linutronix.de \
    --cc=broonie@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=dave.martin@arm.com \
    --cc=ebiggers@kernel.org \
    --cc=herbert@gondor.apana.org.au \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.