linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/5] x86_64: use REP MOVSB in copy_page()
@ 2017-04-26 18:23 Alexey Dobriyan
  2017-04-26 18:28 ` [PATCH 2/5] x86_64: inline copy_page() at call site Alexey Dobriyan
  0 siblings, 1 reply; 9+ messages in thread
From: Alexey Dobriyan @ 2017-04-26 18:23 UTC (permalink / raw)
  To: x86, tglx, mingo, hpa; +Cc: linux-kernel

On my Broadwell-era Xeon copying page with REP MOVSB is ~7.8% faster
than with REP MOVSQ. Choose REP MOVSB copy_page() at runtime
with alternatives.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

 arch/x86/lib/copy_page_64.S |   10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -13,13 +13,21 @@
  */
 	ALIGN
 ENTRY(copy_page)
-	ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
+	ALTERNATIVE_2 "jmp copy_page_regs",	\
+		"", X86_FEATURE_REP_GOOD,	\
+		"jmp copy_page_rep_movsb", X86_FEATURE_ERMS
 	movl	$4096/8, %ecx
 	rep	movsq
 	ret
 ENDPROC(copy_page)
 EXPORT_SYMBOL(copy_page)
 
+ENTRY(copy_page_rep_movsb)
+	mov	$4096, %ecx
+	rep movsb
+	ret
+ENDPROC(copy_page_rep_movsb)
+
 ENTRY(copy_page_regs)
 	subq	$2*8,	%rsp
 	movq	%rbx,	(%rsp)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 2/5] x86_64: inline copy_page() at call site
  2017-04-26 18:23 [PATCH 1/5] x86_64: use REP MOVSB in copy_page() Alexey Dobriyan
@ 2017-04-26 18:28 ` Alexey Dobriyan
  2017-04-26 18:30   ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Alexey Dobriyan
  2017-04-28 21:04   ` [PATCH 2/5] x86_64: inline copy_page() at call site Borislav Petkov
  0 siblings, 2 replies; 9+ messages in thread
From: Alexey Dobriyan @ 2017-04-26 18:28 UTC (permalink / raw)
  To: x86, tglx, mingo, hpa; +Cc: linux-kernel

Avoid unconditional branch at every copy_page() call by using
alternatives and calling optimal variant directly.

Rename individual versions to immediately show which one is used in
profiles, etc.

RBX and R12 aren't clobbered because generic version restores them
and both REP versions don't touch them.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

 arch/x86/include/asm/page_64.h |   16 ++++++++++++++--
 arch/x86/lib/copy_page_64.S    |   17 +++++++----------
 2 files changed, 21 insertions(+), 12 deletions(-)

--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -49,8 +49,20 @@ static inline void clear_page(void *page)
 			   : "memory", "rax", "rcx");
 }
 
-void copy_page(void *to, void *from);
-
+void copy_page_mov(void *to, void *from);
+void copy_page_rep_movsq(void *to, void *from);
+void copy_page_rep_movsb(void *to, void *from);
+static __always_inline void copy_page(void *to, void *from)
+{
+	alternative_call_2(
+		copy_page_mov,
+		copy_page_rep_movsq, X86_FEATURE_REP_GOOD,
+		copy_page_rep_movsb, X86_FEATURE_ERMS,
+		ASM_OUTPUT2("=D" (to), "=S" (from)),
+		"0" (to), "1" (from)
+		: "rax", "rcx", "rdx", "r8", "r9", "r10", "r11", "cc", "memory"
+	);
+}
 #endif	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -1,8 +1,6 @@
 /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
 
 #include <linux/linkage.h>
-#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
 #include <asm/export.h>
 
 /*
@@ -12,23 +10,21 @@
  * prefetch distance based on SMP/UP.
  */
 	ALIGN
-ENTRY(copy_page)
-	ALTERNATIVE_2 "jmp copy_page_regs",	\
-		"", X86_FEATURE_REP_GOOD,	\
-		"jmp copy_page_rep_movsb", X86_FEATURE_ERMS
+ENTRY(copy_page_rep_movsq)
 	movl	$4096/8, %ecx
 	rep	movsq
 	ret
-ENDPROC(copy_page)
-EXPORT_SYMBOL(copy_page)
+ENDPROC(copy_page_rep_movsq)
+EXPORT_SYMBOL(copy_page_rep_movsq)
 
 ENTRY(copy_page_rep_movsb)
 	mov	$4096, %ecx
 	rep movsb
 	ret
 ENDPROC(copy_page_rep_movsb)
+EXPORT_SYMBOL(copy_page_rep_movsb)
 
-ENTRY(copy_page_regs)
+ENTRY(copy_page_mov)
 	subq	$2*8,	%rsp
 	movq	%rbx,	(%rsp)
 	movq	%r12,	1*8(%rsp)
@@ -93,4 +89,5 @@ ENTRY(copy_page_regs)
 	movq	1*8(%rsp), %r12
 	addq	$2*8, %rsp
 	ret
-ENDPROC(copy_page_regs)
+ENDPROC(copy_page_mov)
+EXPORT_SYMBOL(copy_page_mov)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants
  2017-04-26 18:28 ` [PATCH 2/5] x86_64: inline copy_page() at call site Alexey Dobriyan
@ 2017-04-26 18:30   ` Alexey Dobriyan
  2017-04-26 18:34     ` [PATCH 4/5] x86_64: clobber "cc" in inlined clear_page() Alexey Dobriyan
  2017-05-05 16:58     ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Borislav Petkov
  2017-04-28 21:04   ` [PATCH 2/5] x86_64: inline copy_page() at call site Borislav Petkov
  1 sibling, 2 replies; 9+ messages in thread
From: Alexey Dobriyan @ 2017-04-26 18:30 UTC (permalink / raw)
  To: x86, tglx, mingo, hpa; +Cc: linux-kernel

Patch changes market-ish acronyms like ERMS and chatty names
to consistent and shorter versions:

	xxx_mov
	xxx_rep_stosq	xxx_rep_movsq
	xxx_rep_stosb	xxx_rep_movsb

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

 arch/x86/include/asm/page_64.h    |   12 ++++++------
 arch/x86/include/asm/uaccess_64.h |   18 +++++++++---------
 arch/x86/lib/clear_page_64.S      |   18 +++++++++---------
 arch/x86/lib/copy_user_64.S       |   20 ++++++++++----------
 tools/perf/ui/browsers/annotate.c |    2 +-
 5 files changed, 35 insertions(+), 35 deletions(-)

--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -35,15 +35,15 @@ extern unsigned long __phys_addr_symbol(unsigned long);
 #define pfn_valid(pfn)          ((pfn) < max_pfn)
 #endif
 
-void clear_page_orig(void *page);
-void clear_page_rep(void *page);
-void clear_page_erms(void *page);
+void clear_page_mov(void *page);
+void clear_page_rep_stosq(void *page);
+void clear_page_rep_stosb(void *page);
 
 static inline void clear_page(void *page)
 {
-	alternative_call_2(clear_page_orig,
-			   clear_page_rep, X86_FEATURE_REP_GOOD,
-			   clear_page_erms, X86_FEATURE_ERMS,
+	alternative_call_2(clear_page_mov,
+			   clear_page_rep_stosq, X86_FEATURE_REP_GOOD,
+			   clear_page_rep_stosb, X86_FEATURE_ERMS,
 			   "=D" (page),
 			   "0" (page)
 			   : "memory", "rax", "rcx");
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -18,11 +18,11 @@
 
 /* Handles exceptions in both to and from, but doesn't do access_ok */
 __must_check unsigned long
-copy_user_enhanced_fast_string(void *to, const void *from, unsigned len);
+copy_user_rep_movsb(void *to, const void *from, unsigned len);
 __must_check unsigned long
-copy_user_generic_string(void *to, const void *from, unsigned len);
+copy_user_rep_movsq(void *to, const void *from, unsigned len);
 __must_check unsigned long
-copy_user_generic_unrolled(void *to, const void *from, unsigned len);
+copy_user_mov(void *to, const void *from, unsigned len);
 
 static __always_inline __must_check unsigned long
 copy_user_generic(void *to, const void *from, unsigned len)
@@ -30,14 +30,14 @@ copy_user_generic(void *to, const void *from, unsigned len)
 	unsigned ret;
 
 	/*
-	 * If CPU has ERMS feature, use copy_user_enhanced_fast_string.
-	 * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
-	 * Otherwise, use copy_user_generic_unrolled.
+	 * If CPU has ERMS feature, use copy_user_rep_movsb.
+	 * Otherwise, if CPU has rep_good feature, use copy_user_rep_movsq.
+	 * Otherwise, use copy_user_mov.
 	 */
-	alternative_call_2(copy_user_generic_unrolled,
-			 copy_user_generic_string,
+	alternative_call_2(copy_user_mov,
+			 copy_user_rep_movsq,
 			 X86_FEATURE_REP_GOOD,
-			 copy_user_enhanced_fast_string,
+			 copy_user_rep_movsb,
 			 X86_FEATURE_ERMS,
 			 ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
 				     "=d" (len)),
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -14,15 +14,15 @@
  * Zero a page.
  * %rdi	- page
  */
-ENTRY(clear_page_rep)
+ENTRY(clear_page_rep_stosq)
 	movl $4096/8,%ecx
 	xorl %eax,%eax
 	rep stosq
 	ret
-ENDPROC(clear_page_rep)
-EXPORT_SYMBOL_GPL(clear_page_rep)
+ENDPROC(clear_page_rep_stosq)
+EXPORT_SYMBOL_GPL(clear_page_rep_stosq)
 
-ENTRY(clear_page_orig)
+ENTRY(clear_page_mov)
 	xorl   %eax,%eax
 	movl   $4096/64,%ecx
 	.p2align 4
@@ -41,13 +41,13 @@ ENTRY(clear_page_orig)
 	jnz	.Lloop
 	nop
 	ret
-ENDPROC(clear_page_orig)
-EXPORT_SYMBOL_GPL(clear_page_orig)
+ENDPROC(clear_page_mov)
+EXPORT_SYMBOL_GPL(clear_page_mov)
 
-ENTRY(clear_page_erms)
+ENTRY(clear_page_rep_stosb)
 	movl $4096,%ecx
 	xorl %eax,%eax
 	rep stosb
 	ret
-ENDPROC(clear_page_erms)
-EXPORT_SYMBOL_GPL(clear_page_erms)
+ENDPROC(clear_page_rep_stosb)
+EXPORT_SYMBOL_GPL(clear_page_rep_stosb)
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -17,7 +17,7 @@
 #include <asm/export.h>
 
 /*
- * copy_user_generic_unrolled - memory copy with exception handling.
+ * copy_user_mov - memory copy with exception handling.
  * This version is for CPUs like P4 that don't have efficient micro
  * code for rep movsq
  *
@@ -29,7 +29,7 @@
  * Output:
  * eax uncopied bytes or 0 if successful.
  */
-ENTRY(copy_user_generic_unrolled)
+ENTRY(copy_user_mov)
 	ASM_STAC
 	cmpl $8,%edx
 	jb 20f		/* less then 8 bytes, go to byte copy loop */
@@ -111,8 +111,8 @@ ENTRY(copy_user_generic_unrolled)
 	_ASM_EXTABLE(19b,40b)
 	_ASM_EXTABLE(21b,50b)
 	_ASM_EXTABLE(22b,50b)
-ENDPROC(copy_user_generic_unrolled)
-EXPORT_SYMBOL(copy_user_generic_unrolled)
+ENDPROC(copy_user_mov)
+EXPORT_SYMBOL(copy_user_mov)
 
 /* Some CPUs run faster using the string copy instructions.
  * This is also a lot simpler. Use them when possible.
@@ -132,7 +132,7 @@ EXPORT_SYMBOL(copy_user_generic_unrolled)
  * Output:
  * eax uncopied bytes or 0 if successful.
  */
-ENTRY(copy_user_generic_string)
+ENTRY(copy_user_rep_movsq)
 	ASM_STAC
 	cmpl $8,%edx
 	jb 2f		/* less than 8 bytes, go to byte copy loop */
@@ -157,8 +157,8 @@ ENTRY(copy_user_generic_string)
 
 	_ASM_EXTABLE(1b,11b)
 	_ASM_EXTABLE(3b,12b)
-ENDPROC(copy_user_generic_string)
-EXPORT_SYMBOL(copy_user_generic_string)
+ENDPROC(copy_user_rep_movsq)
+EXPORT_SYMBOL(copy_user_rep_movsq)
 
 /*
  * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
@@ -172,7 +172,7 @@ EXPORT_SYMBOL(copy_user_generic_string)
  * Output:
  * eax uncopied bytes or 0 if successful.
  */
-ENTRY(copy_user_enhanced_fast_string)
+ENTRY(copy_user_rep_movsb)
 	ASM_STAC
 	movl %edx,%ecx
 1:	rep
@@ -187,8 +187,8 @@ ENTRY(copy_user_enhanced_fast_string)
 	.previous
 
 	_ASM_EXTABLE(1b,12b)
-ENDPROC(copy_user_enhanced_fast_string)
-EXPORT_SYMBOL(copy_user_enhanced_fast_string)
+ENDPROC(copy_user_rep_movsb)
+EXPORT_SYMBOL(copy_user_rep_movsb)
 
 /*
  * copy_user_nocache - Uncached memory copy with exception handling
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -1084,7 +1084,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 			 * routines that comes with labels in the same column
 			 * as the address in objdump, sigh.
 			 *
-			 * E.g. copy_user_generic_unrolled
+			 * E.g. copy_user_mov
  			 */
 			if (pos->offset < (s64)size)
 				browser.offsets[pos->offset] = pos;

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 4/5] x86_64: clobber "cc" in inlined clear_page()
  2017-04-26 18:30   ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Alexey Dobriyan
@ 2017-04-26 18:34     ` Alexey Dobriyan
  2017-04-26 18:35       ` [PATCH 5/5] x86_64: garbage collect headers in clear_page.S Alexey Dobriyan
  2017-05-05 16:58     ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Borislav Petkov
  1 sibling, 1 reply; 9+ messages in thread
From: Alexey Dobriyan @ 2017-04-26 18:34 UTC (permalink / raw)
  To: x86, tglx, mingo, hpa; +Cc: linux-kernel, bp

Both REP variants clobber flags because of "xor eax, eax" instructions.
While they can be changed to "mov eax, 0", generic version probably
can not because it has to do comparison at some point.

And it is kind of not worth it to not clobber flags anyway.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

 arch/x86/include/asm/page_64.h |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -46,7 +46,7 @@ static inline void clear_page(void *page)
 			   clear_page_rep_stosb, X86_FEATURE_ERMS,
 			   "=D" (page),
 			   "0" (page)
-			   : "memory", "rax", "rcx");
+			   : "cc", "memory", "rax", "rcx");
 }
 
 void copy_page_mov(void *to, void *from);

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 5/5] x86_64: garbage collect headers in clear_page.S
  2017-04-26 18:34     ` [PATCH 4/5] x86_64: clobber "cc" in inlined clear_page() Alexey Dobriyan
@ 2017-04-26 18:35       ` Alexey Dobriyan
  0 siblings, 0 replies; 9+ messages in thread
From: Alexey Dobriyan @ 2017-04-26 18:35 UTC (permalink / raw)
  To: x86, tglx, mingo, hpa; +Cc: linux-kernel, bp

Not necessary after inlining clear_page().

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---

 arch/x86/lib/clear_page_64.S |    2 --
 1 file changed, 2 deletions(-)

--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,6 +1,4 @@
 #include <linux/linkage.h>
-#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
 #include <asm/export.h>
 
 /*

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/5] x86_64: inline copy_page() at call site
  2017-04-26 18:28 ` [PATCH 2/5] x86_64: inline copy_page() at call site Alexey Dobriyan
  2017-04-26 18:30   ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Alexey Dobriyan
@ 2017-04-28 21:04   ` Borislav Petkov
  2017-05-02 11:49     ` Alexey Dobriyan
  1 sibling, 1 reply; 9+ messages in thread
From: Borislav Petkov @ 2017-04-28 21:04 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: x86, tglx, mingo, hpa, linux-kernel

On Wed, Apr 26, 2017 at 09:28:06PM +0300, Alexey Dobriyan wrote:
> Avoid unconditional branch at every copy_page() call by using
> alternatives and calling optimal variant directly.
> 
> Rename individual versions to immediately show which one is used in
> profiles, etc.
> 
> RBX and R12 aren't clobbered because generic version restores them
> and both REP versions don't touch them.
> 
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> ---
> 
>  arch/x86/include/asm/page_64.h |   16 ++++++++++++++--
>  arch/x86/lib/copy_page_64.S    |   17 +++++++----------
>  2 files changed, 21 insertions(+), 12 deletions(-)
> 
> --- a/arch/x86/include/asm/page_64.h
> +++ b/arch/x86/include/asm/page_64.h
> @@ -49,8 +49,20 @@ static inline void clear_page(void *page)
>  			   : "memory", "rax", "rcx");
>  }
>  
> -void copy_page(void *to, void *from);
> -
> +void copy_page_mov(void *to, void *from);
> +void copy_page_rep_movsq(void *to, void *from);
> +void copy_page_rep_movsb(void *to, void *from);

<---- newline here.

> +static __always_inline void copy_page(void *to, void *from)
> +{
> +	alternative_call_2(

Please align at the opening brace, like clear_page() above it:

	alternative_call_2(copy_page_mov,
			   copy_page_rep_movsq, X86_FEATURE_REP_GOOD,
			   ...


> +		copy_page_rep_movsb, X86_FEATURE_ERMS,
> +		ASM_OUTPUT2("=D" (to), "=S" (from)),
> +		"0" (to), "1" (from)
> +		: "rax", "rcx", "rdx", "r8", "r9", "r10", "r11", "cc", "memory"
> +	);
> +}
>  #endif	/* !__ASSEMBLY__ */
>  
>  #ifdef CONFIG_X86_VSYSCALL_EMULATION

...

>  ENTRY(copy_page_rep_movsb)
>  	mov	$4096, %ecx
>  	rep movsb
>  	ret
>  ENDPROC(copy_page_rep_movsb)
> +EXPORT_SYMBOL(copy_page_rep_movsb)
>  
> -ENTRY(copy_page_regs)
> +ENTRY(copy_page_mov)

copy_page_regs() is a better name IMO. copy_page_mov() doesn't tell me
anything - all three use "mov". copy_page_unrolled() sounds ok too.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/5] x86_64: inline copy_page() at call site
  2017-04-28 21:04   ` [PATCH 2/5] x86_64: inline copy_page() at call site Borislav Petkov
@ 2017-05-02 11:49     ` Alexey Dobriyan
  2017-05-02 11:59       ` Borislav Petkov
  0 siblings, 1 reply; 9+ messages in thread
From: Alexey Dobriyan @ 2017-05-02 11:49 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: the arch/x86 maintainers, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, Linux Kernel

On Sat, Apr 29, 2017 at 12:04 AM, Borislav Petkov <bp@alien8.de> wrote:
> On Wed, Apr 26, 2017 at 09:28:06PM +0300, Alexey Dobriyan wrote:

>> +static __always_inline void copy_page(void *to, void *from)
>> +{
>> +     alternative_call_2(
>
> Please align at the opening brace, like clear_page() above it:

Then I'd have to split clobber list and no lines will be saved.

>         alternative_call_2(copy_page_mov,
>                            copy_page_rep_movsq, X86_FEATURE_REP_GOOD,
>                            ...
>
>
>> +             copy_page_rep_movsb, X86_FEATURE_ERMS,
>> +             ASM_OUTPUT2("=D" (to), "=S" (from)),
>> +             "0" (to), "1" (from)
>> +             : "rax", "rcx", "rdx", "r8", "r9", "r10", "r11", "cc", "memory"
>> +     );
>> +}
>>  #endif       /* !__ASSEMBLY__ */
>>
>>  #ifdef CONFIG_X86_VSYSCALL_EMULATION
>
> ...
>
>>  ENTRY(copy_page_rep_movsb)
>>       mov     $4096, %ecx
>>       rep movsb
>>       ret
>>  ENDPROC(copy_page_rep_movsb)
>> +EXPORT_SYMBOL(copy_page_rep_movsb)
>>
>> -ENTRY(copy_page_regs)
>> +ENTRY(copy_page_mov)
>
> copy_page_regs() is a better name IMO. copy_page_mov() doesn't tell me
> anything - all three use "mov". copy_page_unrolled() sounds ok too.

It says unambiguously which instruction does the actual copying.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/5] x86_64: inline copy_page() at call site
  2017-05-02 11:49     ` Alexey Dobriyan
@ 2017-05-02 11:59       ` Borislav Petkov
  0 siblings, 0 replies; 9+ messages in thread
From: Borislav Petkov @ 2017-05-02 11:59 UTC (permalink / raw)
  To: Alexey Dobriyan
  Cc: the arch/x86 maintainers, Thomas Gleixner, Ingo Molnar,
	H. Peter Anvin, Linux Kernel

On Tue, May 02, 2017 at 02:49:04PM +0300, Alexey Dobriyan wrote:
> It says unambiguously which instruction does the actual copying.

And that doesn't tell me that it is an unrolled moving using registers.
And that is much more useful info when I look at stack traces than just
"mov".

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants
  2017-04-26 18:30   ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Alexey Dobriyan
  2017-04-26 18:34     ` [PATCH 4/5] x86_64: clobber "cc" in inlined clear_page() Alexey Dobriyan
@ 2017-05-05 16:58     ` Borislav Petkov
  1 sibling, 0 replies; 9+ messages in thread
From: Borislav Petkov @ 2017-05-05 16:58 UTC (permalink / raw)
  To: Alexey Dobriyan; +Cc: x86, tglx, mingo, hpa, linux-kernel

On Wed, Apr 26, 2017 at 09:30:47PM +0300, Alexey Dobriyan wrote:
> Patch changes market-ish acronyms like ERMS and chatty names
> to consistent and shorter versions:
> 
> 	xxx_mov
> 	xxx_rep_stosq	xxx_rep_movsq
> 	xxx_rep_stosb	xxx_rep_movsb
> 
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> ---
> 
>  arch/x86/include/asm/page_64.h    |   12 ++++++------
>  arch/x86/include/asm/uaccess_64.h |   18 +++++++++---------
>  arch/x86/lib/clear_page_64.S      |   18 +++++++++---------
>  arch/x86/lib/copy_user_64.S       |   20 ++++++++++----------
>  tools/perf/ui/browsers/annotate.c |    2 +-
>  5 files changed, 35 insertions(+), 35 deletions(-)
> 
> --- a/arch/x86/include/asm/page_64.h
> +++ b/arch/x86/include/asm/page_64.h
> @@ -35,15 +35,15 @@ extern unsigned long __phys_addr_symbol(unsigned long);
>  #define pfn_valid(pfn)          ((pfn) < max_pfn)
>  #endif
>  
> -void clear_page_orig(void *page);
> -void clear_page_rep(void *page);
> -void clear_page_erms(void *page);
> +void clear_page_mov(void *page);
> +void clear_page_rep_stosq(void *page);
> +void clear_page_rep_stosb(void *page);
>  
>  static inline void clear_page(void *page)
>  {
> -	alternative_call_2(clear_page_orig,
> -			   clear_page_rep, X86_FEATURE_REP_GOOD,
> -			   clear_page_erms, X86_FEATURE_ERMS,
> +	alternative_call_2(clear_page_mov,
> +			   clear_page_rep_stosq, X86_FEATURE_REP_GOOD,
> +			   clear_page_rep_stosb, X86_FEATURE_ERMS,
>  			   "=D" (page),
>  			   "0" (page)
>  			   : "memory", "rax", "rcx");
> --- a/arch/x86/include/asm/uaccess_64.h
> +++ b/arch/x86/include/asm/uaccess_64.h
> @@ -18,11 +18,11 @@
>  
>  /* Handles exceptions in both to and from, but doesn't do access_ok */
>  __must_check unsigned long
> -copy_user_enhanced_fast_string(void *to, const void *from, unsigned len);
> +copy_user_rep_movsb(void *to, const void *from, unsigned len);

WARNING: Prefer 'unsigned int' to bare use of 'unsigned'
#62: FILE: arch/x86/include/asm/uaccess_64.h:21:
+copy_user_rep_movsb(void *to, const void *from, unsigned len);

Pls convert them while at it.

>  __must_check unsigned long
> -copy_user_generic_string(void *to, const void *from, unsigned len);
> +copy_user_rep_movsq(void *to, const void *from, unsigned len);
>  __must_check unsigned long
> -copy_user_generic_unrolled(void *to, const void *from, unsigned len);
> +copy_user_mov(void *to, const void *from, unsigned len);
>  
>  static __always_inline __must_check unsigned long
>  copy_user_generic(void *to, const void *from, unsigned len)
> @@ -30,14 +30,14 @@ copy_user_generic(void *to, const void *from, unsigned len)
>  	unsigned ret;
>  
>  	/*
> -	 * If CPU has ERMS feature, use copy_user_enhanced_fast_string.
> -	 * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
> -	 * Otherwise, use copy_user_generic_unrolled.
> +	 * If CPU has ERMS feature, use copy_user_rep_movsb.
> +	 * Otherwise, if CPU has rep_good feature, use copy_user_rep_movsq.

REP_GOOD, while you're at it. Also, end function names with ().

> +	 * Otherwise, use copy_user_mov.
>  	 */
> -	alternative_call_2(copy_user_generic_unrolled,
> -			 copy_user_generic_string,
> +	alternative_call_2(copy_user_mov,
> +			 copy_user_rep_movsq,
>  			 X86_FEATURE_REP_GOOD,
> -			 copy_user_enhanced_fast_string,
> +			 copy_user_rep_movsb,
>  			 X86_FEATURE_ERMS,
>  			 ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
>  				     "=d" (len)),
> --- a/arch/x86/lib/clear_page_64.S
> +++ b/arch/x86/lib/clear_page_64.S
> @@ -14,15 +14,15 @@
>   * Zero a page.
>   * %rdi	- page
>   */
> -ENTRY(clear_page_rep)
> +ENTRY(clear_page_rep_stosq)
>  	movl $4096/8,%ecx
>  	xorl %eax,%eax
>  	rep stosq
>  	ret
> -ENDPROC(clear_page_rep)
> -EXPORT_SYMBOL_GPL(clear_page_rep)
> +ENDPROC(clear_page_rep_stosq)
> +EXPORT_SYMBOL_GPL(clear_page_rep_stosq)
>  
> -ENTRY(clear_page_orig)
> +ENTRY(clear_page_mov)
>  	xorl   %eax,%eax
>  	movl   $4096/64,%ecx
>  	.p2align 4
> @@ -41,13 +41,13 @@ ENTRY(clear_page_orig)
>  	jnz	.Lloop
>  	nop
>  	ret
> -ENDPROC(clear_page_orig)
> -EXPORT_SYMBOL_GPL(clear_page_orig)
> +ENDPROC(clear_page_mov)
> +EXPORT_SYMBOL_GPL(clear_page_mov)

Same issue as with the previous patch: _orig was dumb but since you're
changing the names, pls change them to something more descriptive.

> -ENTRY(clear_page_erms)
> +ENTRY(clear_page_rep_stosb)
>  	movl $4096,%ecx
>  	xorl %eax,%eax
>  	rep stosb
>  	ret
> -ENDPROC(clear_page_erms)
> -EXPORT_SYMBOL_GPL(clear_page_erms)
> +ENDPROC(clear_page_rep_stosb)
> +EXPORT_SYMBOL_GPL(clear_page_rep_stosb)
> --- a/arch/x86/lib/copy_user_64.S
> +++ b/arch/x86/lib/copy_user_64.S
> @@ -17,7 +17,7 @@
>  #include <asm/export.h>
>  
>  /*
> - * copy_user_generic_unrolled - memory copy with exception handling.
> + * copy_user_mov - memory copy with exception handling.

This rename is actually losing information from the function name:
"generic_unrolled" explains exactly what the function does.

>   * This version is for CPUs like P4 that don't have efficient micro
>   * code for rep movsq
>   *

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-05-05 16:58 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-26 18:23 [PATCH 1/5] x86_64: use REP MOVSB in copy_page() Alexey Dobriyan
2017-04-26 18:28 ` [PATCH 2/5] x86_64: inline copy_page() at call site Alexey Dobriyan
2017-04-26 18:30   ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Alexey Dobriyan
2017-04-26 18:34     ` [PATCH 4/5] x86_64: clobber "cc" in inlined clear_page() Alexey Dobriyan
2017-04-26 18:35       ` [PATCH 5/5] x86_64: garbage collect headers in clear_page.S Alexey Dobriyan
2017-05-05 16:58     ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Borislav Petkov
2017-04-28 21:04   ` [PATCH 2/5] x86_64: inline copy_page() at call site Borislav Petkov
2017-05-02 11:49     ` Alexey Dobriyan
2017-05-02 11:59       ` Borislav Petkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).