All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mark Hemment <markhemm@googlemail.com>
To: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	 Andrew Morton <akpm@linux-foundation.org>,
	 the arch/x86 maintainers <x86@kernel.org>,
	 Peter Zijlstra <peterz@infradead.org>,
	patrice.chotard@foss.st.com,
	 Mikulas Patocka <mpatocka@redhat.com>,
	markhemm@googlemail.com,  Lukas Czerner <lczerner@redhat.com>,
	Christoph Hellwig <hch@lst.de>,
	 "Darrick J. Wong" <djwong@kernel.org>,
	 Chuck Lever <chuck.lever@oracle.com>,
	Hugh Dickins <hughd@google.com>,
	 patches@lists.linux.dev, Linux-MM <linux-mm@kvack.org>,
	 mm-commits@vger.kernel.org
Subject: Re: [patch 02/14] tmpfs: fix regressions from wider use of ZERO_PAGE
Date: Sat, 16 Apr 2022 15:07:47 +0100 (BST)	[thread overview]
Message-ID: <29b9ef95-1226-73b4-b4d1-6e8d164fb17d@gmail.com> (raw)
In-Reply-To: <Ylpj9of+CP4ipDtm@zn.tnic>


On Sat, 16 Apr 2022, Borislav Petkov wrote:

> On Fri, Apr 15, 2022 at 03:10:51PM -0700, Linus Torvalds wrote:
> > Adding PeterZ and Borislav (who seem to be the last ones to have
> > worked on the copy and clear_page stuff respectively) and the x86
> > maintainers in case somebody gets the urge to just fix this.
> 
> I guess if enough people ask and keep asking, some people at least try
> to move...
> 
> > Because memory clearing should be faster than copying, and the thing
> > that makes copying fast is that FSRM and ERMS logic (the whole
> > "manually unrolled copy" is hopefully mostly a thing of the past and
> > we can consider it legacy)
> 
> So I did give it a look and it seems to me, if we want to do the
> alternatives thing here, it will have to look something like
> arch/x86/lib/copy_user_64.S.
> 
> I.e., the current __clear_user() will have to become the "handle_tail"
> thing there which deals with uncopied rest-bytes at the end and the new
> fsrm/erms/rep_good variants will then be alternative_call_2 or _3.
> 
> The fsrm thing will have only the handle_tail thing at the end when size
> != 0.
> 
> The others - erms and rep_good - will have to check for sizes smaller
> than, say a cacheline, and for those call the handle_tail thing directly
> instead of going into a REP loop. The current __clear_user() is still a
> lot better than that copy_user_generic_unrolled() abomination. And it's
> not like old CPUs would get any perf penalty - they'll simply use the
> same code.
> 
> And then you need the labels for _ASM_EXTABLE_UA() exception handling.
> 
> Anyway, something along those lines.
> 
> And then we'll need to benchmark this on a bunch of current machines to
> make sure there's no funny surprises, perf-wise.
> 
> I can get cracking on this but I would advise people not to hold their
> breaths. :)
> 
> Unless someone has a better idea or is itching to get hands dirty
> her-/himself.

I've done a skeleton implementation of alternative __clear_user() based on 
CPU features.
It has three versions of __clear_user();
o __clear_user_original() - similar to the 'standard' __clear_user()
o __clear_user_rep_good() - using resp stos{qb} when CPU has 'rep_good'
o __clear_user_erms() - using 'resp stosb' when CPU has 'erms'

Not claiming the implementation is ideal, but might be a useful starting 
point for someone.
Patch is against 5.18.0-rc2.
Only basic sanity testing done.

Simple performance testing done for large sizes, on a system (Intel E8400) 
which has rep_good but not erms;
# dd if=/dev/zero of=/dev/null bs=16384 count=10000
o *_original() - ~14.2GB/s.  Same as the 'standard' __clear_user().
o *_rep_good() - same throughput as *_original().
o *_erms()     - ~12.2GB/s (expected on a system without erms).

No performance testing done for zeroing small sizes.

Cheers,
Mark

Signed-off-by: Mark Hemment <markhemm@googlemail.com>
---
 arch/x86/include/asm/asm.h        |  39 +++++++++++++++
 arch/x86/include/asm/uaccess_64.h |  36 ++++++++++++++
 arch/x86/lib/clear_page_64.S      | 100 ++++++++++++++++++++++++++++++++++++++
 arch/x86/lib/usercopy_64.c        |  32 ------------
 4 files changed, 175 insertions(+), 32 deletions(-)

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index fbcfec4dc4cc..373ed6be7a8d 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -132,6 +132,35 @@
 /* Exception table entry */
 #ifdef __ASSEMBLY__
 
+# define UNDEFINE_EXTABLE_TYPE_REG \
+	.purgem extable_type_reg ;
+
+# define DEFINE_EXTABLE_TYPE_REG \
+	.macro extable_type_reg type:req reg:req ;			\
+	.set .Lfound, 0	;						\
+	.set .Lregnr, 0 ;						\
+	.irp rs,rax,rcx,rdx,rbx,rsp,rbp,rsi,rdi,r8,r9,r10,r11,r12,r13,	\
+	     r14,r15 ;							\
+	.ifc \reg, %\rs ;						\
+	.set .Lfound, .Lfound+1 ;					\
+	.long \type + (.Lregnr << 8) ;					\
+	.endif ;							\
+	.set .Lregnr, .Lregnr+1 ;					\
+	.endr ;								\
+	.set .Lregnr, 0 ;						\
+	.irp rs,eax,ecx,edx,ebx,esp,ebp,esi,edi,r8d,r9d,r10d,r11d,r12d, \
+	     r13d,r14d,r15d ;						\
+	.ifc \reg, %\rs ;						\
+	.set .Lfound, .Lfound+1 ;					\
+	.long \type + (.Lregnr << 8) ;					\
+	.endif ;							\
+	.set .Lregnr, .Lregnr+1 ;					\
+	.endr ;								\
+	.if (.Lfound != 1) ;						\
+	.error "extable_type_reg: bad register argument" ;		\
+	.endif ;							\
+	.endm ;
+
 # define _ASM_EXTABLE_TYPE(from, to, type)			\
 	.pushsection "__ex_table","a" ;				\
 	.balign 4 ;						\
@@ -140,6 +169,16 @@
 	.long type ;						\
 	.popsection
 
+# define _ASM_EXTABLE_TYPE_REG(from, to, type1, reg1)		\
+	.pushsection "__ex_table","a" ;				\
+	.balign 4 ;						\
+	.long (from) - . ;					\
+	.long (to) - . ;					\
+	DEFINE_EXTABLE_TYPE_REG					\
+	extable_type_reg reg=reg1, type=type1 ;			\
+	UNDEFINE_EXTABLE_TYPE_REG				\
+	.popsection
+
 # ifdef CONFIG_KPROBES
 #  define _ASM_NOKPROBE(entry)					\
 	.pushsection "_kprobe_blacklist","aw" ;			\
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 45697e04d771..6a4995e4cfae 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -79,4 +79,40 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
 	kasan_check_write(dst, size);
 	return __copy_user_flushcache(dst, src, size);
 }
+
+/*
+ * Zero Userspace.
+ */
+
+__must_check unsigned long
+clear_user_original(void __user *addr, unsigned long len);
+__must_check unsigned long
+clear_user_rep_good(void __user *addr, unsigned long len);
+__must_check unsigned long
+clear_user_erms(void __user *addr, unsigned long len);
+
+static __always_inline __must_check unsigned long
+___clear_user(void __user *addr, unsigned long len)
+{
+	unsigned long	ret;
+
+	/*
+	 * No memory constraint because it doesn't change any memory gcc
+	 * knows about.
+	 */
+
+	might_fault();
+	alternative_call_2(
+		clear_user_original,
+		clear_user_rep_good,
+		X86_FEATURE_REP_GOOD,
+		clear_user_erms,
+		X86_FEATURE_ERMS,
+		ASM_OUTPUT2("=a" (ret), "=D" (addr), "=c" (len)),
+		"1" (addr), "2" (len)
+		: "%rdx", "cc");
+	return ret;
+}
+
+#define __clear_user(d, n)	___clear_user(d, n)
 #endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index fe59b8ac4fcc..abe1f44ea422 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,5 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 #include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/smap.h>
 #include <asm/export.h>
 
 /*
@@ -50,3 +52,101 @@ SYM_FUNC_START(clear_page_erms)
 	RET
 SYM_FUNC_END(clear_page_erms)
 EXPORT_SYMBOL_GPL(clear_page_erms)
+
+/*
+ * Default clear user-space.
+ * Input:
+ * rdi destination
+ * rcx count
+ *
+ * Output:
+ * rax uncopied bytes or 0 if successful.
+ */
+
+SYM_FUNC_START(clear_user_original)
+	ASM_STAC
+	movq %rcx,%rax
+	shrq $3,%rcx
+	andq $7,%rax
+	testq %rcx,%rcx
+	jz 1f
+
+	.p2align 4
+0:	movq $0,(%rdi)
+	leaq 8(%rdi),%rdi
+	decq %rcx
+	jnz   0b
+
+1:	movq %rax,%rcx
+	testq %rcx,%rcx
+	jz 3f
+
+2:	movb $0,(%rdi)
+	incq %rdi
+	decl %ecx
+	jnz  2b
+
+3:	ASM_CLAC
+	movq %rcx,%rax
+	RET
+
+	_ASM_EXTABLE_TYPE_REG(0b, 3b, EX_TYPE_UCOPY_LEN8, %rax)
+	_ASM_EXTABLE_UA(2b, 3b)
+SYM_FUNC_END(clear_user_original)
+EXPORT_SYMBOL(clear_user_original)
+
+/*
+ * Alternative clear user-space when CPU feature X86_FEATURE_REP_GOOD is
+ * present.
+ * Input:
+ * rdi destination
+ * rcx count
+ *
+ * Output:
+ * rax uncopied bytes or 0 if successful.
+ */
+
+SYM_FUNC_START(clear_user_rep_good)
+	ASM_STAC
+	movq %rcx,%rdx
+	xorq %rax,%rax
+	shrq $3,%rcx
+	andq $7,%rdx
+
+0:	rep stosq
+	movq %rdx,%rcx
+
+1:	rep stosb
+
+3:	ASM_CLAC
+	movq %rcx,%rax
+	RET
+
+	_ASM_EXTABLE_TYPE_REG(0b, 3b, EX_TYPE_UCOPY_LEN8, %rdx)
+	_ASM_EXTABLE_UA(1b, 3b)
+SYM_FUNC_END(clear_user_rep_good)
+EXPORT_SYMBOL(clear_user_rep_good)
+
+/*
+ * Alternative clear user-space when CPU feature X86_FEATURE_ERMS is present.
+ * Input:
+ * rdi destination
+ * rcx count
+ *
+ * Output:
+ * rax uncopied bytes or 0 if successful.
+ */
+
+SYM_FUNC_START(clear_user_erms)
+	xorq %rax,%rax
+	ASM_STAC
+
+0:	rep stosb
+
+3:	ASM_CLAC
+	movq %rcx,%rax
+	RET
+
+	_ASM_EXTABLE_UA(0b, 3b)
+SYM_FUNC_END(clear_user_erms)
+EXPORT_SYMBOL(clear_user_erms)
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 0402a749f3a0..3a2872c9c4a9 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -14,38 +14,6 @@
  * Zero Userspace
  */
 
-unsigned long __clear_user(void __user *addr, unsigned long size)
-{
-	long __d0;
-	might_fault();
-	/* no memory constraint because it doesn't change any memory gcc knows
-	   about */
-	stac();
-	asm volatile(
-		"	testq  %[size8],%[size8]\n"
-		"	jz     4f\n"
-		"	.align 16\n"
-		"0:	movq $0,(%[dst])\n"
-		"	addq   $8,%[dst]\n"
-		"	decl %%ecx ; jnz   0b\n"
-		"4:	movq  %[size1],%%rcx\n"
-		"	testl %%ecx,%%ecx\n"
-		"	jz     2f\n"
-		"1:	movb   $0,(%[dst])\n"
-		"	incq   %[dst]\n"
-		"	decl %%ecx ; jnz  1b\n"
-		"2:\n"
-
-		_ASM_EXTABLE_TYPE_REG(0b, 2b, EX_TYPE_UCOPY_LEN8, %[size1])
-		_ASM_EXTABLE_UA(1b, 2b)
-
-		: [size8] "=&c"(size), [dst] "=&D" (__d0)
-		: [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr));
-	clac();
-	return size;
-}
-EXPORT_SYMBOL(__clear_user);
-
 unsigned long clear_user(void __user *to, unsigned long n)
 {
 	if (access_ok(to, n))

  reply	other threads:[~2022-04-16 14:08 UTC|newest]

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-15  2:12 incoming Andrew Morton
2022-04-15  2:13 ` [patch 01/14] MAINTAINERS: Broadcom internal lists aren't maintainers Andrew Morton
2022-04-15  2:13   ` Andrew Morton
2022-04-15  2:13 ` [patch 02/14] tmpfs: fix regressions from wider use of ZERO_PAGE Andrew Morton
2022-04-15  2:13   ` Andrew Morton
2022-04-15 22:10   ` Linus Torvalds
2022-04-15 22:21     ` Matthew Wilcox
2022-04-15 22:41     ` Hugh Dickins
2022-04-16  6:36     ` Borislav Petkov
2022-04-16 14:07       ` Mark Hemment [this message]
2022-04-16 17:28         ` Borislav Petkov
2022-04-16 17:42           ` Linus Torvalds
2022-04-16 21:15             ` Borislav Petkov
2022-04-17 19:41               ` Borislav Petkov
2022-04-17 20:56                 ` Linus Torvalds
2022-04-18 10:15                   ` Borislav Petkov
2022-04-18 17:10                     ` Linus Torvalds
2022-04-19  9:17                       ` Borislav Petkov
2022-04-19 16:41                         ` Linus Torvalds
2022-04-19 17:48                           ` Borislav Petkov
2022-04-21 15:06                             ` Borislav Petkov
2022-04-21 16:50                               ` Linus Torvalds
2022-04-21 17:22                                 ` Linus Torvalds
2022-04-24 19:37                                   ` Borislav Petkov
2022-04-24 19:54                                     ` Linus Torvalds
2022-04-24 20:24                                       ` Linus Torvalds
2022-04-27  0:14                                       ` Borislav Petkov
2022-04-27  1:29                                         ` Linus Torvalds
2022-04-27 10:41                                           ` Borislav Petkov
2022-04-27 16:00                                             ` Linus Torvalds
2022-05-04 18:56                                               ` Borislav Petkov
2022-05-04 19:22                                                 ` Linus Torvalds
2022-05-04 20:18                                                   ` Borislav Petkov
2022-05-04 20:40                                                     ` Linus Torvalds
2022-05-04 21:01                                                       ` Borislav Petkov
2022-05-04 21:09                                                         ` Linus Torvalds
2022-05-10  9:31                                                           ` clear_user (was: [patch 02/14] tmpfs: fix regressions from wider use of ZERO_PAGE) Borislav Petkov
2022-05-10 17:17                                                             ` Linus Torvalds
2022-05-10 17:28                                                             ` Linus Torvalds
2022-05-10 18:10                                                               ` Borislav Petkov
2022-05-10 18:57                                                                 ` Borislav Petkov
2022-05-24 12:32                                                                   ` [PATCH] x86/clear_user: Make it faster Borislav Petkov
2022-05-24 16:51                                                                     ` Linus Torvalds
2022-05-24 17:30                                                                       ` Borislav Petkov
2022-05-25 12:11                                                                     ` Mark Hemment
2022-05-27 11:28                                                                       ` Borislav Petkov
2022-05-27 11:10                                                                     ` Ingo Molnar
2022-06-22 14:21                                                                     ` Borislav Petkov
2022-06-22 15:06                                                                       ` Linus Torvalds
2022-06-22 20:14                                                                         ` Borislav Petkov
2022-06-22 21:07                                                                           ` Linus Torvalds
2022-06-23  9:41                                                                             ` Borislav Petkov
2022-07-05 17:01                                                                               ` [PATCH -final] " Borislav Petkov
2022-07-06  9:24                                                                                 ` Alexey Dobriyan
2022-07-11 10:33                                                                                   ` Borislav Petkov
2022-07-12 12:32                                                                                     ` Alexey Dobriyan
2022-08-06 12:49                                                                                       ` Borislav Petkov
2022-08-18 10:44     ` [tip: x86/cpu] " tip-bot2 for Borislav Petkov
2022-04-15  2:13 ` [patch 03/14] mm/secretmem: fix panic when growing a memfd_secret Andrew Morton
2022-04-15  2:13   ` Andrew Morton
2022-04-15  2:13 ` [patch 04/14] irq_work: use kasan_record_aux_stack_noalloc() record callstack Andrew Morton
2022-04-15  2:13   ` Andrew Morton
2022-04-15  2:13 ` [patch 05/14] kasan: fix hw tags enablement when KUNIT tests are disabled Andrew Morton
2022-04-15  2:13   ` Andrew Morton
2022-04-15  2:13 ` [patch 06/14] mm, kfence: support kmem_dump_obj() for KFENCE objects Andrew Morton
2022-04-15  2:13   ` Andrew Morton
2022-04-15  2:13 ` [patch 07/14] mm, page_alloc: fix build_zonerefs_node() Andrew Morton
2022-04-15  2:13   ` Andrew Morton
2022-04-15  2:13 ` [patch 08/14] mm: fix unexpected zeroed page mapping with zram swap Andrew Morton
2022-04-15  2:13   ` Andrew Morton
2022-04-15  2:13 ` [patch 09/14] mm: compaction: fix compiler warning when CONFIG_COMPACTION=n Andrew Morton
2022-04-15  2:13   ` Andrew Morton
2022-04-15  2:13 ` [patch 10/14] hugetlb: do not demote poisoned hugetlb pages Andrew Morton
2022-04-15  2:13   ` Andrew Morton
2022-04-15  2:13 ` [patch 11/14] revert "fs/binfmt_elf: fix PT_LOAD p_align values for loaders" Andrew Morton
2022-04-15  2:13   ` Andrew Morton
2022-04-15  2:13 ` [patch 12/14] revert "fs/binfmt_elf: use PT_LOAD p_align values for static PIE" Andrew Morton
2022-04-15  2:13   ` Andrew Morton
2022-04-15  2:14 ` [patch 13/14] mm/vmalloc: fix spinning drain_vmap_work after reading from /proc/vmcore Andrew Morton
2022-04-15  2:14   ` Andrew Morton
2022-04-15  2:14 ` [patch 14/14] mm: kmemleak: take a full lowmem check in kmemleak_*_phys() Andrew Morton
2022-04-15  2:14   ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=29b9ef95-1226-73b4-b4d1-6e8d164fb17d@gmail.com \
    --to=markhemm@googlemail.com \
    --cc=akpm@linux-foundation.org \
    --cc=bp@alien8.de \
    --cc=chuck.lever@oracle.com \
    --cc=djwong@kernel.org \
    --cc=hch@lst.de \
    --cc=hughd@google.com \
    --cc=lczerner@redhat.com \
    --cc=linux-mm@kvack.org \
    --cc=mm-commits@vger.kernel.org \
    --cc=mpatocka@redhat.com \
    --cc=patches@lists.linux.dev \
    --cc=patrice.chotard@foss.st.com \
    --cc=peterz@infradead.org \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.