linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@kernel.org>
To: "Luck, Tony" <tony.luck@intel.com>
Cc: linux-kernel@vger.kernel.org,
	Thomas Gleixner <tglx@linutronix.de>,
	"H. Peter Anvin" <hpa@zytor.com>, Borislav Petkov <bp@alien8.de>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: Re: [PATCH v12] x86, mce: Add memcpy_trap()
Date: Fri, 19 Feb 2016 10:10:13 +0100	[thread overview]
Message-ID: <20160219091013.GA8665@gmail.com> (raw)
In-Reply-To: <20160218211410.GA18341@agluck-desk.sc.intel.com>


* Luck, Tony <tony.luck@intel.com> wrote:

> Make use of the EXTABLE_FAULT exception table entries. This routine
> returns a structure to indicate the result of the copy:
> 
> struct mcsafe_ret {
>         u64 trap_nr;
>         u64 bytes_left;
> };
> 
> If the copy is successful, then both 'trap_nr' and 'bytes_left' are zero.
> 
> If we faulted during the copy, then 'trap_nr' will say which type
> of trap (X86_TRAP_PF or X86_TRAP_MC) and 'bytes_left' says how many
> bytes were not copied.
> 
> Note that this is probably the first of several copy functions.
> We can make new ones for non-temporal cache handling etc.
> 
> Reviewed-by: Borislav Petkov <bp@suse.de>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
> 
> V12 (part3 only - parts 1,2,4 are in tip tree)
> 
> Ingo:	More meaningful names for fields of return structure
> PeterZ:	Better name for copy function: memcpy_trap()
> Ingo:	Don't use numeric labels in new asm code
> Ingo:	Consistent capitalization in comments.
> Ingo:	Periods between sentences in comments.
> 
> Not addressed: Linus' comment that perhaps we could try/catch
> syntactic sugar instead of returning multiple values in a structure.
> 
>  arch/x86/include/asm/string_64.h |  26 +++++++
>  arch/x86/kernel/x8664_ksyms_64.c |   2 +
>  arch/x86/lib/memcpy_64.S         | 158 +++++++++++++++++++++++++++++++++++++++
>  3 files changed, 186 insertions(+)
> 
> diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
> index ff8b9a17dc4b..65e5793b7590 100644
> --- a/arch/x86/include/asm/string_64.h
> +++ b/arch/x86/include/asm/string_64.h
> @@ -78,6 +78,32 @@ int strcmp(const char *cs, const char *ct);
>  #define memset(s, c, n) __memset(s, c, n)
>  #endif
>  
> +/**
> + * struct memcpy_trap_ret - return value from memcpy_trap()
> + *
> + * @trap_nr	x86 trap number if the copy failed
> + * @bytes_left	zero for successful copy else number of bytes not copied
> + */
> +struct memcpy_trap_ret {
> +	u64 trap_nr;
> +	u64 bytes_left;
> +};
> +
> +/**
> + * memcpy_trap - copy memory with indication if a trap interrupted the copy
> + *
> + * @dst:	destination address
> + * @src:	source address
> + * @cnt:	number of bytes to copy
> + *
> + * Low level memory copy function that catches traps and indicates whether
> + * the copy succeeded and if not, why it failed.
> + *
> + * Return is struct memcpy_trap_ret which provides both the number of bytes
> + * not copied and the reason for the failure.
> + */
> +struct memcpy_trap_ret memcpy_trap(void *dst, const void __user *src, size_t cnt);
> +
>  #endif /* __KERNEL__ */
>  
>  #endif /* _ASM_X86_STRING_64_H */
> diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
> index a0695be19864..40866e2cbcc4 100644
> --- a/arch/x86/kernel/x8664_ksyms_64.c
> +++ b/arch/x86/kernel/x8664_ksyms_64.c
> @@ -37,6 +37,8 @@ EXPORT_SYMBOL(__copy_user_nocache);
>  EXPORT_SYMBOL(_copy_from_user);
>  EXPORT_SYMBOL(_copy_to_user);
>  
> +EXPORT_SYMBOL_GPL(memcpy_trap);
> +
>  EXPORT_SYMBOL(copy_page);
>  EXPORT_SYMBOL(clear_page);
>  
> diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
> index 16698bba87de..aecdfc41c114 100644
> --- a/arch/x86/lib/memcpy_64.S
> +++ b/arch/x86/lib/memcpy_64.S
> @@ -177,3 +177,161 @@ ENTRY(memcpy_orig)
>  .Lend:
>  	retq
>  ENDPROC(memcpy_orig)
> +
> +#ifndef CONFIG_UML
> +/*
> + * memcpy_trap - memory copy with machine check exception handling
> + * Note that we only catch machine checks when reading the source addresses.
> + * Writes to target are posted and don't generate machine checks.
> + */
> +ENTRY(memcpy_trap)
> +	cmpl $8,%edx
> +	/* Less than 8 bytes? Go to byte copy loop */
> +	jb .L_no_whole_words
> +
> +	/* Check for bad alignment of source */
> +	testl $7,%esi
> +	/* Already aligned */
> +	jz .L_8byte_aligned
> +
> +	/* Copy one byte at a time until source is 8-byte aligned */
> +	movl %esi,%ecx
> +	andl $7,%ecx
> +	subl $8,%ecx
> +	negl %ecx
> +	subl %ecx,%edx
> +.L_copy_leading_bytes:
> +	movb (%rsi),%al
> +	movb %al,(%rdi)
> +	incq %rsi
> +	incq %rdi
> +	decl %ecx
> +	jnz .L_copy_leading_bytes
> +
> +.L_8byte_aligned:
> +	/* Figure out how many whole cache lines (64-bytes) to copy */
> +	movl %edx,%ecx
> +	andl $63,%edx
> +	shrl $6,%ecx
> +	jz .L_no_whole_cache_lines
> +
> +	/* Loop copying whole cache lines */
> +.L_cache_w0: movq (%rsi),%r8
> +.L_cache_w1: movq 1*8(%rsi),%r9
> +.L_cache_w2: movq 2*8(%rsi),%r10
> +.L_cache_w3: movq 3*8(%rsi),%r11
> +	movq %r8,(%rdi)
> +	movq %r9,1*8(%rdi)
> +	movq %r10,2*8(%rdi)
> +	movq %r11,3*8(%rdi)
> +.L_cache_w4: movq 4*8(%rsi),%r8
> +.L_cache_w5: movq 5*8(%rsi),%r9
> +.L_cache_w6: movq 6*8(%rsi),%r10
> +.L_cache_w7: movq 7*8(%rsi),%r11
> +	movq %r8,4*8(%rdi)
> +	movq %r9,5*8(%rdi)
> +	movq %r10,6*8(%rdi)
> +	movq %r11,7*8(%rdi)
> +	leaq 64(%rsi),%rsi
> +	leaq 64(%rdi),%rdi
> +	decl %ecx
> +	jnz .L_cache_w0
> +
> +	/* Are there any trailing 8-byte words? */
> +.L_no_whole_cache_lines:
> +	movl %edx,%ecx
> +	andl $7,%edx
> +	shrl $3,%ecx
> +	jz .L_no_whole_words
> +
> +	/* Copy trailing words */
> +.L_copy_trailing_words:
> +	movq (%rsi),%r8
> +	mov %r8,(%rdi)
> +	leaq 8(%rsi),%rsi
> +	leaq 8(%rdi),%rdi
> +	decl %ecx
> +	jnz .L_copy_trailing_words
> +
> +	/* Any trailing bytes? */
> +.L_no_whole_words:
> +	andl %edx,%edx
> +	jz .L_done_memcpy_trap
> +
> +	/* Copy trailing bytes */
> +	movl %edx,%ecx
> +.L_copy_trailing_bytes:
> +	movb (%rsi),%al
> +	movb %al,(%rdi)
> +	incq %rsi
> +	incq %rdi
> +	decl %ecx
> +	jnz .L_copy_trailing_bytes
> +
> +	/* Copy successful. Return .remain = 0, .trapnr = 0 */
> +.L_done_memcpy_trap:
> +	xorq %rax, %rax
> +	xorq %rdx, %rdx
> +	ret
> +
> +	.section .fixup,"ax"
> +	/*
> +	 * The machine check handler loaded %rax with trap number.
> +	 * We just need to make sure %edx has the number of
> +	 * bytes remaining.
> +	 */
> +.L_fix_leading_bytes:
> +	add %ecx,%edx
> +	ret
> +.L_fix_cache_w0:
> +	shl $6,%ecx
> +	add %ecx,%edx
> +	ret
> +.L_fix_cache_w1:
> +	shl $6,%ecx
> +	lea -8(%ecx,%edx),%edx
> +	ret
> +.L_fix_cache_w2:
> +	shl $6,%ecx
> +	lea -16(%ecx,%edx),%edx
> +	ret
> +.L_fix_cache_w3:
> +	shl $6,%ecx
> +	lea -24(%ecx,%edx),%edx
> +	ret
> +.L_fix_cache_w4:
> +	shl $6,%ecx
> +	lea -32(%ecx,%edx),%edx
> +	ret
> +.L_fix_cache_w5:
> +	shl $6,%ecx
> +	lea -40(%ecx,%edx),%edx
> +	ret
> +.L_fix_cache_w6:
> +	shl $6,%ecx
> +	lea -48(%ecx,%edx),%edx
> +	ret
> +.L_fix_cache_w7:
> +	shl $6,%ecx
> +	lea -56(%ecx,%edx),%edx
> +	ret
> +.L_fix_trailing_words:
> +	lea (%rdx,%rcx,8),%rdx
> +	ret
> +.L_fix_trailing_bytes:
> +	mov %ecx,%edx
> +	ret
> +	.previous
> +
> +	_ASM_EXTABLE_FAULT(.L_copy_leading_bytes,.L_fix_leading_bytes)
> +	_ASM_EXTABLE_FAULT(.L_cache_w0,.L_fix_cache_w0)
> +	_ASM_EXTABLE_FAULT(.L_cache_w1,.L_fix_cache_w1)
> +	_ASM_EXTABLE_FAULT(.L_cache_w3,.L_fix_cache_w2)
> +	_ASM_EXTABLE_FAULT(.L_cache_w3,.L_fix_cache_w3)
> +	_ASM_EXTABLE_FAULT(.L_cache_w4,.L_fix_cache_w4)
> +	_ASM_EXTABLE_FAULT(.L_cache_w5,.L_fix_cache_w5)
> +	_ASM_EXTABLE_FAULT(.L_cache_w6,.L_fix_cache_w6)
> +	_ASM_EXTABLE_FAULT(.L_cache_w7,.L_fix_cache_w7)
> +	_ASM_EXTABLE_FAULT(.L_copy_trailing_words,.L_fix_trailing_words)
> +	_ASM_EXTABLE_FAULT(.L_copy_trailing_bytes,.L_fix_trailing_bytes)
> +#endif

Ok, I absolutely love this assembly code, it's already a lot easier to read than 
95% of the x86 assembly code we have today!

There's two minor things I've noticed:

1) please put a space between instruction operands, i.e.:

-	shl $6,%ecx
+	shl $6, %ecx

2)

There's a way to make the exception fixup stubs more readable, by aligning them 
vertically, via something like:

.L_fix_cache_w0:	shl $6, %ecx; add %ecx, %edx;		ret
.L_fix_cache_w1:	shl $6, %ecx; lea  -8(%ecx,%edx), %edx;	ret
.L_fix_cache_w2:	shl $6, %ecx; lea -16(%ecx,%edx), %edx;	ret
.L_fix_cache_w3:	shl $6, %ecx; lea -24(%ecx,%edx), %edx; ret
.L_fix_cache_w4:	shl $6, %ecx; lea -32(%ecx,%edx), %edx; ret
.L_fix_cache_w5:	shl $6, %ecx; lea -40(%ecx,%edx), %edx; ret
.L_fix_cache_w6:	shl $6, %ecx; lea -48(%ecx,%edx), %edx; ret
.L_fix_cache_w7:	shl $6, %ecx; lea -56(%ecx,%edx), %edx; ret

this also makes it a lot easier to check the correctness of the fixup stubs. Also 
this layout makes it clear that the first fixup stub uses 'ADD', while the others 
use LEA, etc.

Thanks,

	Ingo

  reply	other threads:[~2016-02-19  9:10 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-17 18:20 [PATCH v11 0/4] Machine check recovery when kernel accesses poison Tony Luck
2016-02-17 18:20 ` [PATCH v11 1/4] x86: Expand exception table to allow new handling options Tony Luck
2016-02-18 10:19   ` [tip:ras/core] x86/mm: Expand the exception table logic " tip-bot for Tony Luck
2016-02-17 18:20 ` [PATCH v11 4/4] x86: Create a new synthetic cpu capability for machine check recovery Tony Luck
2016-02-18 10:19   ` [tip:x86/asm] x86/cpufeature: " tip-bot for Tony Luck
2016-02-17 18:20 ` [PATCH v11 3/4] x86, mce: Add __mcsafe_copy() Tony Luck
2016-02-18  8:21   ` Ingo Molnar
2016-02-18  9:59     ` Peter Zijlstra
2016-02-18 10:19       ` Ingo Molnar
2016-02-18 10:29         ` Borislav Petkov
2016-02-18 10:35         ` Peter Zijlstra
2016-02-18 14:59           ` Luck, Tony
2016-02-19  7:58       ` Ingo Molnar
2016-02-19  8:43         ` Peter Zijlstra
2016-02-19  9:51           ` Ingo Molnar
2016-02-18 10:29     ` Borislav Petkov
2016-02-18 10:34       ` Ingo Molnar
2016-02-18 10:36         ` Borislav Petkov
2016-02-18 18:48           ` Ingo Molnar
2016-02-18 21:14     ` [PATCH v12] x86, mce: Add memcpy_trap() Luck, Tony
2016-02-19  9:10       ` Ingo Molnar [this message]
2016-02-19 17:53         ` [PATCH v13] " Luck, Tony
2016-02-24 17:38           ` Tony Luck
2016-02-24 18:35             ` Linus Torvalds
2016-02-24 19:27               ` Tony Luck
2016-02-24 19:37                 ` Linus Torvalds
2016-02-25  8:56                   ` Ingo Molnar
2016-02-25 19:33                     ` Luck, Tony
2016-02-25 20:39                       ` Linus Torvalds
2016-02-25 22:11                         ` Andy Lutomirski
2016-02-18 19:47                           ` [PATCH v14] x86, mce: Add memcpy_mcsafe() Tony Luck
2016-03-02 20:47                             ` Luck, Tony
2016-03-08 17:37                             ` [tip:ras/core] x86/mm, x86/mce: " tip-bot for Tony Luck
2016-03-10 19:26                             ` [PATCH v14] x86, mce: " Mika Penttilä
2016-03-10 19:37                               ` Luck, Tony
2016-03-11 22:10                                 ` Tony Luck
2016-03-11 22:14                                   ` Dan Williams
2016-03-12 17:16                                   ` Ingo Molnar
2016-03-13  1:12                                     ` Linus Torvalds
2016-03-13  9:25                                       ` Ingo Molnar
2016-03-14 22:33                                         ` [PATCH] x86/mm, x86/mce: Fix return type/value for memcpy_mcsafe() Tony Luck
2016-03-16  8:06                                           ` [tip:x86/urgent] " tip-bot for Tony Luck
2016-02-26  0:58                           ` [PATCH v13] x86, mce: Add memcpy_trap() Linus Torvalds
2016-02-26  1:19                             ` Andy Lutomirski
2016-02-26  2:38                               ` Linus Torvalds
2016-02-18 18:12   ` [PATCH v11 3/4] x86, mce: Add __mcsafe_copy() Linus Torvalds
2016-02-18 18:51     ` Ingo Molnar
2016-02-18 18:52     ` Luck, Tony
2016-02-18 20:14       ` Ingo Molnar
2016-02-18 21:33         ` Dan Williams
2016-02-17 18:20 ` [PATCH v11 2/4] x86, mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries Tony Luck
2016-02-18 10:19   ` [tip:ras/core] x86/mce: " tip-bot for Tony Luck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160219091013.GA8665@gmail.com \
    --to=mingo@kernel.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=bp@alien8.de \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).