From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <prvs=876720850=Andrew.Cooper3@citrix.com>
Received: from mail.linutronix.de (146.0.238.70:993) by
  crypto-ml.lab.linutronix.de with IMAP4-SSL for <speck@linutronix.de>; 11 Dec
  2018 00:33:29 -0000
Received: from smtp.ctxuk.citrix.com ([185.25.65.24] helo=SMTP.EU.CITRIX.COM)
	by Galois.linutronix.de with esmtps (TLS1.2:DHE_RSA_AES_256_CBC_SHA256:256)
	(Exim 4.80)
	(envelope-from <prvs=876720850=Andrew.Cooper3@citrix.com>)
	id 1gWVzM-00008Q-1S
	for speck@linutronix.de; Tue, 11 Dec 2018 01:33:28 +0100
Subject: [MODERATED] Re: [PATCH v2 5/8] MDSv2 7
References: <cover.1544464266.git.ak@linux.intel.com>
 <86f7d711109a095f09a0a9cfd9ca5efeba04cb23.1544464266.git.ak@linux.intel.com>
From: Andrew Cooper <andrew.cooper3@citrix.com>
Message-ID: <fef2282c-dd0f-5e30-d11e-fb0acb8938a7@citrix.com>
Date: Tue, 11 Dec 2018 00:33:20 +0000
MIME-Version: 1.0
In-Reply-To: 
 <86f7d711109a095f09a0a9cfd9ca5efeba04cb23.1544464266.git.ak@linux.intel.com>
Content-Type: multipart/mixed; boundary="JbYs7Zk95OxBDoeVd9zV3sVPhbicyWDqR"; protected-headers="v1"
To: speck@linutronix.de
List-ID: <speck.linutronix.de>

--JbYs7Zk95OxBDoeVd9zV3sVPhbicyWDqR
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: quoted-printable
Content-Language: en-GB

On 10/12/2018 17:53, speck for Andi Kleen wrote:
> xdiff --git a/arch/x86/lib/clear_cpu.S b/arch/x86/lib/clear_cpu.S
> new file mode 100644
> index 000000000000..5af33baf5427
> --- /dev/null
> +++ b/arch/x86/lib/clear_cpu.S
> @@ -0,0 +1,107 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#include <linux/linkage.h>
> +#include <asm/alternative-asm.h>
> +#include <asm/cpufeatures.h>
> +
> +/*
> + * Clear internal CPU buffers on kernel boundaries.
> + *
> + * These sequences are somewhat fragile, please don't add
> + * or change instructions in the middle of the areas marked with
> + * start/end.
> + *
> + * Interrupts and NMIs we deal with by reclearing. We clear parts
> + * of the kernel stack, which has other advantages too.
> + *
> + * Save all registers to make it easier to use for callers.
> + *
> + * This sequence is for Nehalem-IvyBridge. For Haswell we jump
> + * to hsw_clear_buf.
> + *
> + * These functions need to be called on a full stack, as they may
> + * use upto 1.5k of stack. They should be also called with
> + * interrupts disabled. NMIs etc. are handled by letting every
> + * NMI do its own clear sequence.
> + */
> +ENTRY(ivb_clear_cpu)
> +GLOBAL(do_clear_cpu)
> +	/*
> +	 * obj[tf]ool complains about unreachable code here,
> +	 * which appears to be spurious?!?
> +	 */
> +	ALTERNATIVE "", "jmp hsw_clear_cpu", X86_BUG_MDS_CLEAR_CPU_HSW
> +	push %__ASM_REG(si)
> +	push %__ASM_REG(di)
> +	push %__ASM_REG(cx)
> +	mov %_ASM_SP, %__ASM_REG(si)
> +	sub  $2*16, %_ASM_SP
> +	and  $-16,%_ASM_SP
> +	movdqa %xmm0, (%_ASM_SP)
> +	movdqa %xmm1, 1*16(%_ASM_SP)

You don't need to preserve %xmm1 here.=C2=A0 It is unmodified by the
sequence, because the orpd pulls zero out of its memory operand.=C2=A0
Similarly...

> +	sub  $672, %_ASM_SP
> +	xorpd %xmm0,%xmm0
> +	movdqa %xmm0, (%_ASM_SP)
> +	movdqa %xmm0, 16(%_ASM_SP)

=2E.. this store doesn't appear to do anything useful, as that stack slot=

isn't read again, and...

> +	mov %_ASM_SP, %__ASM_REG(di)
> +	/* Clear sequence start */
> +	movdqu %xmm0,(%__ASM_REG(di))
> +	lfence
> +	orpd (%__ASM_REG(di)), %xmm0
> +	orpd (%__ASM_REG(di)), %xmm1
> +	mfence
> +	movl $40, %ecx
> +	add  $32, %__ASM_REG(di)

=2E.. I know this was in the recommended sequence, but bytes 16-31 aren't=

used at all, and it feels fishy.

Either this wants to be $16, or the second orpd wants a displacement of
16 (and we do need to retain the second zeroing write) so all 32 bytes
are used.

> +1:	movntdq %xmm0, (%__ASM_REG(di))
> +	add  $16, %__ASM_REG(di)
> +	decl %ecx
> +	jnz  1b
> +	mfence
> +	/* Clear sequence end */
> +	add  $672, %_ASM_SP
> +	movdqu (%_ASM_SP), %xmm0
> +	movdqu 1*16(%_ASM_SP), %xmm1
> +	mov  %__ASM_REG(si),%_ASM_SP
> +	pop %__ASM_REG(cx)
> +	pop %__ASM_REG(di)
> +	pop %__ASM_REG(si)
> +	ret
> +END(ivb_clear_cpu)
> +
> +/*
> + * Version for Haswell/Broadwell.
> + */
> +ENTRY(hsw_clear_cpu)
> +	push %__ASM_REG(si)
> +	push %__ASM_REG(di)
> +	push %__ASM_REG(cx)
> +	push %__ASM_REG(ax)
> +	mov  %_ASM_SP, %__ASM_REG(ax)
> +	sub  $16, %_ASM_SP
> +	and  $-16,%_ASM_SP
> +	movdqa %xmm0, (%_ASM_SP)
> +	/* Clear sequence start */
> +	xorpd %xmm0,%xmm0
> +	sub  $1536,%_ASM_SP
> +	mov  %_ASM_SP, %__ASM_REG(si)
> +	mov  %__ASM_REG(si), %__ASM_REG(di)

Strictly speaking, these 3 instructions aren't part of the clear
sequence.=C2=A0 They are just for setting up the buffer.=C2=A0 In particu=
lar, sub
$1536 %rsp is unbalanced WRT its matching add.

~Andrew

> +	movl $40,%ecx
> +1:	movntdq %xmm0, (%__ASM_REG(di))
> +	add  $16, %__ASM_REG(di)
> +	decl %ecx
> +	jnz  1b
> +	mfence
> +	mov  %__ASM_REG(si), %__ASM_REG(di)
> +	mov $1536, %ecx
> +	rep movsb
> +	lfence
> +	/* Clear sequence end */
> +	add $1536,%_ASM_SP
> +	movdqa (%_ASM_SP), %xmm0
> +	mov %__ASM_REG(ax),%_ASM_SP
> +	pop %__ASM_REG(ax)
> +	pop %__ASM_REG(cx)
> +	pop %__ASM_REG(di)
> +	pop %__ASM_REG(si)
> +	ret
> +END(hsw_clear_cpu)


--JbYs7Zk95OxBDoeVd9zV3sVPhbicyWDqR--