* [PATCH 1/5] x86_64: use REP MOVSB in copy_page()
@ 2017-04-26 18:23 Alexey Dobriyan
2017-04-26 18:28 ` [PATCH 2/5] x86_64: inline copy_page() at call site Alexey Dobriyan
0 siblings, 1 reply; 9+ messages in thread
From: Alexey Dobriyan @ 2017-04-26 18:23 UTC (permalink / raw)
To: x86, tglx, mingo, hpa; +Cc: linux-kernel
On my Broadwell-era Xeon copying page with REP MOVSB is ~7.8% faster
than with REP MOVSQ. Choose REP MOVSB copy_page() at runtime
with alternatives.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
arch/x86/lib/copy_page_64.S | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -13,13 +13,21 @@
*/
ALIGN
ENTRY(copy_page)
- ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
+ ALTERNATIVE_2 "jmp copy_page_regs", \
+ "", X86_FEATURE_REP_GOOD, \
+ "jmp copy_page_rep_movsb", X86_FEATURE_ERMS
movl $4096/8, %ecx
rep movsq
ret
ENDPROC(copy_page)
EXPORT_SYMBOL(copy_page)
+ENTRY(copy_page_rep_movsb)
+ mov $4096, %ecx
+ rep movsb
+ ret
+ENDPROC(copy_page_rep_movsb)
+
ENTRY(copy_page_regs)
subq $2*8, %rsp
movq %rbx, (%rsp)
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 2/5] x86_64: inline copy_page() at call site
2017-04-26 18:23 [PATCH 1/5] x86_64: use REP MOVSB in copy_page() Alexey Dobriyan
@ 2017-04-26 18:28 ` Alexey Dobriyan
2017-04-26 18:30 ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Alexey Dobriyan
2017-04-28 21:04 ` [PATCH 2/5] x86_64: inline copy_page() at call site Borislav Petkov
0 siblings, 2 replies; 9+ messages in thread
From: Alexey Dobriyan @ 2017-04-26 18:28 UTC (permalink / raw)
To: x86, tglx, mingo, hpa; +Cc: linux-kernel
Avoid unconditional branch at every copy_page() call by using
alternatives and calling optimal variant directly.
Rename individual versions to immediately show which one is used in
profiles, etc.
RBX and R12 aren't clobbered because generic version restores them
and both REP versions don't touch them.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
arch/x86/include/asm/page_64.h | 16 ++++++++++++++--
arch/x86/lib/copy_page_64.S | 17 +++++++----------
2 files changed, 21 insertions(+), 12 deletions(-)
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -49,8 +49,20 @@ static inline void clear_page(void *page)
: "memory", "rax", "rcx");
}
-void copy_page(void *to, void *from);
-
+void copy_page_mov(void *to, void *from);
+void copy_page_rep_movsq(void *to, void *from);
+void copy_page_rep_movsb(void *to, void *from);
+static __always_inline void copy_page(void *to, void *from)
+{
+ alternative_call_2(
+ copy_page_mov,
+ copy_page_rep_movsq, X86_FEATURE_REP_GOOD,
+ copy_page_rep_movsb, X86_FEATURE_ERMS,
+ ASM_OUTPUT2("=D" (to), "=S" (from)),
+ "0" (to), "1" (from)
+ : "rax", "rcx", "rdx", "r8", "r9", "r10", "r11", "cc", "memory"
+ );
+}
#endif /* !__ASSEMBLY__ */
#ifdef CONFIG_X86_VSYSCALL_EMULATION
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -1,8 +1,6 @@
/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
#include <linux/linkage.h>
-#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
#include <asm/export.h>
/*
@@ -12,23 +10,21 @@
* prefetch distance based on SMP/UP.
*/
ALIGN
-ENTRY(copy_page)
- ALTERNATIVE_2 "jmp copy_page_regs", \
- "", X86_FEATURE_REP_GOOD, \
- "jmp copy_page_rep_movsb", X86_FEATURE_ERMS
+ENTRY(copy_page_rep_movsq)
movl $4096/8, %ecx
rep movsq
ret
-ENDPROC(copy_page)
-EXPORT_SYMBOL(copy_page)
+ENDPROC(copy_page_rep_movsq)
+EXPORT_SYMBOL(copy_page_rep_movsq)
ENTRY(copy_page_rep_movsb)
mov $4096, %ecx
rep movsb
ret
ENDPROC(copy_page_rep_movsb)
+EXPORT_SYMBOL(copy_page_rep_movsb)
-ENTRY(copy_page_regs)
+ENTRY(copy_page_mov)
subq $2*8, %rsp
movq %rbx, (%rsp)
movq %r12, 1*8(%rsp)
@@ -93,4 +89,5 @@ ENTRY(copy_page_regs)
movq 1*8(%rsp), %r12
addq $2*8, %rsp
ret
-ENDPROC(copy_page_regs)
+ENDPROC(copy_page_mov)
+EXPORT_SYMBOL(copy_page_mov)
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants
2017-04-26 18:28 ` [PATCH 2/5] x86_64: inline copy_page() at call site Alexey Dobriyan
@ 2017-04-26 18:30 ` Alexey Dobriyan
2017-04-26 18:34 ` [PATCH 4/5] x86_64: clobber "cc" in inlined clear_page() Alexey Dobriyan
2017-05-05 16:58 ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Borislav Petkov
2017-04-28 21:04 ` [PATCH 2/5] x86_64: inline copy_page() at call site Borislav Petkov
1 sibling, 2 replies; 9+ messages in thread
From: Alexey Dobriyan @ 2017-04-26 18:30 UTC (permalink / raw)
To: x86, tglx, mingo, hpa; +Cc: linux-kernel
Patch changes market-ish acronyms like ERMS and chatty names
to consistent and shorter versions:
xxx_mov
xxx_rep_stosq xxx_rep_movsq
xxx_rep_stosb xxx_rep_movsb
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
arch/x86/include/asm/page_64.h | 12 ++++++------
arch/x86/include/asm/uaccess_64.h | 18 +++++++++---------
arch/x86/lib/clear_page_64.S | 18 +++++++++---------
arch/x86/lib/copy_user_64.S | 20 ++++++++++----------
tools/perf/ui/browsers/annotate.c | 2 +-
5 files changed, 35 insertions(+), 35 deletions(-)
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -35,15 +35,15 @@ extern unsigned long __phys_addr_symbol(unsigned long);
#define pfn_valid(pfn) ((pfn) < max_pfn)
#endif
-void clear_page_orig(void *page);
-void clear_page_rep(void *page);
-void clear_page_erms(void *page);
+void clear_page_mov(void *page);
+void clear_page_rep_stosq(void *page);
+void clear_page_rep_stosb(void *page);
static inline void clear_page(void *page)
{
- alternative_call_2(clear_page_orig,
- clear_page_rep, X86_FEATURE_REP_GOOD,
- clear_page_erms, X86_FEATURE_ERMS,
+ alternative_call_2(clear_page_mov,
+ clear_page_rep_stosq, X86_FEATURE_REP_GOOD,
+ clear_page_rep_stosb, X86_FEATURE_ERMS,
"=D" (page),
"0" (page)
: "memory", "rax", "rcx");
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -18,11 +18,11 @@
/* Handles exceptions in both to and from, but doesn't do access_ok */
__must_check unsigned long
-copy_user_enhanced_fast_string(void *to, const void *from, unsigned len);
+copy_user_rep_movsb(void *to, const void *from, unsigned len);
__must_check unsigned long
-copy_user_generic_string(void *to, const void *from, unsigned len);
+copy_user_rep_movsq(void *to, const void *from, unsigned len);
__must_check unsigned long
-copy_user_generic_unrolled(void *to, const void *from, unsigned len);
+copy_user_mov(void *to, const void *from, unsigned len);
static __always_inline __must_check unsigned long
copy_user_generic(void *to, const void *from, unsigned len)
@@ -30,14 +30,14 @@ copy_user_generic(void *to, const void *from, unsigned len)
unsigned ret;
/*
- * If CPU has ERMS feature, use copy_user_enhanced_fast_string.
- * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
- * Otherwise, use copy_user_generic_unrolled.
+ * If CPU has ERMS feature, use copy_user_rep_movsb.
+ * Otherwise, if CPU has rep_good feature, use copy_user_rep_movsq.
+ * Otherwise, use copy_user_mov.
*/
- alternative_call_2(copy_user_generic_unrolled,
- copy_user_generic_string,
+ alternative_call_2(copy_user_mov,
+ copy_user_rep_movsq,
X86_FEATURE_REP_GOOD,
- copy_user_enhanced_fast_string,
+ copy_user_rep_movsb,
X86_FEATURE_ERMS,
ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
"=d" (len)),
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -14,15 +14,15 @@
* Zero a page.
* %rdi - page
*/
-ENTRY(clear_page_rep)
+ENTRY(clear_page_rep_stosq)
movl $4096/8,%ecx
xorl %eax,%eax
rep stosq
ret
-ENDPROC(clear_page_rep)
-EXPORT_SYMBOL_GPL(clear_page_rep)
+ENDPROC(clear_page_rep_stosq)
+EXPORT_SYMBOL_GPL(clear_page_rep_stosq)
-ENTRY(clear_page_orig)
+ENTRY(clear_page_mov)
xorl %eax,%eax
movl $4096/64,%ecx
.p2align 4
@@ -41,13 +41,13 @@ ENTRY(clear_page_orig)
jnz .Lloop
nop
ret
-ENDPROC(clear_page_orig)
-EXPORT_SYMBOL_GPL(clear_page_orig)
+ENDPROC(clear_page_mov)
+EXPORT_SYMBOL_GPL(clear_page_mov)
-ENTRY(clear_page_erms)
+ENTRY(clear_page_rep_stosb)
movl $4096,%ecx
xorl %eax,%eax
rep stosb
ret
-ENDPROC(clear_page_erms)
-EXPORT_SYMBOL_GPL(clear_page_erms)
+ENDPROC(clear_page_rep_stosb)
+EXPORT_SYMBOL_GPL(clear_page_rep_stosb)
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -17,7 +17,7 @@
#include <asm/export.h>
/*
- * copy_user_generic_unrolled - memory copy with exception handling.
+ * copy_user_mov - memory copy with exception handling.
* This version is for CPUs like P4 that don't have efficient micro
* code for rep movsq
*
@@ -29,7 +29,7 @@
* Output:
* eax uncopied bytes or 0 if successful.
*/
-ENTRY(copy_user_generic_unrolled)
+ENTRY(copy_user_mov)
ASM_STAC
cmpl $8,%edx
jb 20f /* less then 8 bytes, go to byte copy loop */
@@ -111,8 +111,8 @@ ENTRY(copy_user_generic_unrolled)
_ASM_EXTABLE(19b,40b)
_ASM_EXTABLE(21b,50b)
_ASM_EXTABLE(22b,50b)
-ENDPROC(copy_user_generic_unrolled)
-EXPORT_SYMBOL(copy_user_generic_unrolled)
+ENDPROC(copy_user_mov)
+EXPORT_SYMBOL(copy_user_mov)
/* Some CPUs run faster using the string copy instructions.
* This is also a lot simpler. Use them when possible.
@@ -132,7 +132,7 @@ EXPORT_SYMBOL(copy_user_generic_unrolled)
* Output:
* eax uncopied bytes or 0 if successful.
*/
-ENTRY(copy_user_generic_string)
+ENTRY(copy_user_rep_movsq)
ASM_STAC
cmpl $8,%edx
jb 2f /* less than 8 bytes, go to byte copy loop */
@@ -157,8 +157,8 @@ ENTRY(copy_user_generic_string)
_ASM_EXTABLE(1b,11b)
_ASM_EXTABLE(3b,12b)
-ENDPROC(copy_user_generic_string)
-EXPORT_SYMBOL(copy_user_generic_string)
+ENDPROC(copy_user_rep_movsq)
+EXPORT_SYMBOL(copy_user_rep_movsq)
/*
* Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
@@ -172,7 +172,7 @@ EXPORT_SYMBOL(copy_user_generic_string)
* Output:
* eax uncopied bytes or 0 if successful.
*/
-ENTRY(copy_user_enhanced_fast_string)
+ENTRY(copy_user_rep_movsb)
ASM_STAC
movl %edx,%ecx
1: rep
@@ -187,8 +187,8 @@ ENTRY(copy_user_enhanced_fast_string)
.previous
_ASM_EXTABLE(1b,12b)
-ENDPROC(copy_user_enhanced_fast_string)
-EXPORT_SYMBOL(copy_user_enhanced_fast_string)
+ENDPROC(copy_user_rep_movsb)
+EXPORT_SYMBOL(copy_user_rep_movsb)
/*
* copy_user_nocache - Uncached memory copy with exception handling
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -1084,7 +1084,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
* routines that comes with labels in the same column
* as the address in objdump, sigh.
*
- * E.g. copy_user_generic_unrolled
+ * E.g. copy_user_mov
*/
if (pos->offset < (s64)size)
browser.offsets[pos->offset] = pos;
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 4/5] x86_64: clobber "cc" in inlined clear_page()
2017-04-26 18:30 ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Alexey Dobriyan
@ 2017-04-26 18:34 ` Alexey Dobriyan
2017-04-26 18:35 ` [PATCH 5/5] x86_64: garbage collect headers in clear_page.S Alexey Dobriyan
2017-05-05 16:58 ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Borislav Petkov
1 sibling, 1 reply; 9+ messages in thread
From: Alexey Dobriyan @ 2017-04-26 18:34 UTC (permalink / raw)
To: x86, tglx, mingo, hpa; +Cc: linux-kernel, bp
Both REP variants clobber flags because of "xor eax, eax" instructions.
While they can be changed to "mov eax, 0", generic version probably
can not because it has to do comparison at some point.
And it is kind of not worth it to not clobber flags anyway.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
arch/x86/include/asm/page_64.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -46,7 +46,7 @@ static inline void clear_page(void *page)
clear_page_rep_stosb, X86_FEATURE_ERMS,
"=D" (page),
"0" (page)
- : "memory", "rax", "rcx");
+ : "cc", "memory", "rax", "rcx");
}
void copy_page_mov(void *to, void *from);
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 5/5] x86_64: garbage collect headers in clear_page.S
2017-04-26 18:34 ` [PATCH 4/5] x86_64: clobber "cc" in inlined clear_page() Alexey Dobriyan
@ 2017-04-26 18:35 ` Alexey Dobriyan
0 siblings, 0 replies; 9+ messages in thread
From: Alexey Dobriyan @ 2017-04-26 18:35 UTC (permalink / raw)
To: x86, tglx, mingo, hpa; +Cc: linux-kernel, bp
Not necessary after inlining clear_page().
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
arch/x86/lib/clear_page_64.S | 2 --
1 file changed, 2 deletions(-)
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,6 +1,4 @@
#include <linux/linkage.h>
-#include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
#include <asm/export.h>
/*
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/5] x86_64: inline copy_page() at call site
2017-04-26 18:28 ` [PATCH 2/5] x86_64: inline copy_page() at call site Alexey Dobriyan
2017-04-26 18:30 ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Alexey Dobriyan
@ 2017-04-28 21:04 ` Borislav Petkov
2017-05-02 11:49 ` Alexey Dobriyan
1 sibling, 1 reply; 9+ messages in thread
From: Borislav Petkov @ 2017-04-28 21:04 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: x86, tglx, mingo, hpa, linux-kernel
On Wed, Apr 26, 2017 at 09:28:06PM +0300, Alexey Dobriyan wrote:
> Avoid unconditional branch at every copy_page() call by using
> alternatives and calling optimal variant directly.
>
> Rename individual versions to immediately show which one is used in
> profiles, etc.
>
> RBX and R12 aren't clobbered because generic version restores them
> and both REP versions don't touch them.
>
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> ---
>
> arch/x86/include/asm/page_64.h | 16 ++++++++++++++--
> arch/x86/lib/copy_page_64.S | 17 +++++++----------
> 2 files changed, 21 insertions(+), 12 deletions(-)
>
> --- a/arch/x86/include/asm/page_64.h
> +++ b/arch/x86/include/asm/page_64.h
> @@ -49,8 +49,20 @@ static inline void clear_page(void *page)
> : "memory", "rax", "rcx");
> }
>
> -void copy_page(void *to, void *from);
> -
> +void copy_page_mov(void *to, void *from);
> +void copy_page_rep_movsq(void *to, void *from);
> +void copy_page_rep_movsb(void *to, void *from);
<---- newline here.
> +static __always_inline void copy_page(void *to, void *from)
> +{
> + alternative_call_2(
Please align at the opening brace, like clear_page() above it:
alternative_call_2(copy_page_mov,
copy_page_rep_movsq, X86_FEATURE_REP_GOOD,
...
> + copy_page_rep_movsb, X86_FEATURE_ERMS,
> + ASM_OUTPUT2("=D" (to), "=S" (from)),
> + "0" (to), "1" (from)
> + : "rax", "rcx", "rdx", "r8", "r9", "r10", "r11", "cc", "memory"
> + );
> +}
> #endif /* !__ASSEMBLY__ */
>
> #ifdef CONFIG_X86_VSYSCALL_EMULATION
...
> ENTRY(copy_page_rep_movsb)
> mov $4096, %ecx
> rep movsb
> ret
> ENDPROC(copy_page_rep_movsb)
> +EXPORT_SYMBOL(copy_page_rep_movsb)
>
> -ENTRY(copy_page_regs)
> +ENTRY(copy_page_mov)
copy_page_regs() is a better name IMO. copy_page_mov() doesn't tell me
anything - all three use "mov". copy_page_unrolled() sounds ok too.
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/5] x86_64: inline copy_page() at call site
2017-04-28 21:04 ` [PATCH 2/5] x86_64: inline copy_page() at call site Borislav Petkov
@ 2017-05-02 11:49 ` Alexey Dobriyan
2017-05-02 11:59 ` Borislav Petkov
0 siblings, 1 reply; 9+ messages in thread
From: Alexey Dobriyan @ 2017-05-02 11:49 UTC (permalink / raw)
To: Borislav Petkov
Cc: the arch/x86 maintainers, Thomas Gleixner, Ingo Molnar,
H. Peter Anvin, Linux Kernel
On Sat, Apr 29, 2017 at 12:04 AM, Borislav Petkov <bp@alien8.de> wrote:
> On Wed, Apr 26, 2017 at 09:28:06PM +0300, Alexey Dobriyan wrote:
>> +static __always_inline void copy_page(void *to, void *from)
>> +{
>> + alternative_call_2(
>
> Please align at the opening brace, like clear_page() above it:
Then I'd have to split clobber list and no lines will be saved.
> alternative_call_2(copy_page_mov,
> copy_page_rep_movsq, X86_FEATURE_REP_GOOD,
> ...
>
>
>> + copy_page_rep_movsb, X86_FEATURE_ERMS,
>> + ASM_OUTPUT2("=D" (to), "=S" (from)),
>> + "0" (to), "1" (from)
>> + : "rax", "rcx", "rdx", "r8", "r9", "r10", "r11", "cc", "memory"
>> + );
>> +}
>> #endif /* !__ASSEMBLY__ */
>>
>> #ifdef CONFIG_X86_VSYSCALL_EMULATION
>
> ...
>
>> ENTRY(copy_page_rep_movsb)
>> mov $4096, %ecx
>> rep movsb
>> ret
>> ENDPROC(copy_page_rep_movsb)
>> +EXPORT_SYMBOL(copy_page_rep_movsb)
>>
>> -ENTRY(copy_page_regs)
>> +ENTRY(copy_page_mov)
>
> copy_page_regs() is a better name IMO. copy_page_mov() doesn't tell me
> anything - all three use "mov". copy_page_unrolled() sounds ok too.
It says unambiguously which instruction does the actual copying.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/5] x86_64: inline copy_page() at call site
2017-05-02 11:49 ` Alexey Dobriyan
@ 2017-05-02 11:59 ` Borislav Petkov
0 siblings, 0 replies; 9+ messages in thread
From: Borislav Petkov @ 2017-05-02 11:59 UTC (permalink / raw)
To: Alexey Dobriyan
Cc: the arch/x86 maintainers, Thomas Gleixner, Ingo Molnar,
H. Peter Anvin, Linux Kernel
On Tue, May 02, 2017 at 02:49:04PM +0300, Alexey Dobriyan wrote:
> It says unambiguously which instruction does the actual copying.
And that doesn't tell me that it is an unrolled moving using registers.
And that is much more useful info when I look at stack traces than just
"mov".
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply.
--
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants
2017-04-26 18:30 ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Alexey Dobriyan
2017-04-26 18:34 ` [PATCH 4/5] x86_64: clobber "cc" in inlined clear_page() Alexey Dobriyan
@ 2017-05-05 16:58 ` Borislav Petkov
1 sibling, 0 replies; 9+ messages in thread
From: Borislav Petkov @ 2017-05-05 16:58 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: x86, tglx, mingo, hpa, linux-kernel
On Wed, Apr 26, 2017 at 09:30:47PM +0300, Alexey Dobriyan wrote:
> Patch changes market-ish acronyms like ERMS and chatty names
> to consistent and shorter versions:
>
> xxx_mov
> xxx_rep_stosq xxx_rep_movsq
> xxx_rep_stosb xxx_rep_movsb
>
> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
> ---
>
> arch/x86/include/asm/page_64.h | 12 ++++++------
> arch/x86/include/asm/uaccess_64.h | 18 +++++++++---------
> arch/x86/lib/clear_page_64.S | 18 +++++++++---------
> arch/x86/lib/copy_user_64.S | 20 ++++++++++----------
> tools/perf/ui/browsers/annotate.c | 2 +-
> 5 files changed, 35 insertions(+), 35 deletions(-)
>
> --- a/arch/x86/include/asm/page_64.h
> +++ b/arch/x86/include/asm/page_64.h
> @@ -35,15 +35,15 @@ extern unsigned long __phys_addr_symbol(unsigned long);
> #define pfn_valid(pfn) ((pfn) < max_pfn)
> #endif
>
> -void clear_page_orig(void *page);
> -void clear_page_rep(void *page);
> -void clear_page_erms(void *page);
> +void clear_page_mov(void *page);
> +void clear_page_rep_stosq(void *page);
> +void clear_page_rep_stosb(void *page);
>
> static inline void clear_page(void *page)
> {
> - alternative_call_2(clear_page_orig,
> - clear_page_rep, X86_FEATURE_REP_GOOD,
> - clear_page_erms, X86_FEATURE_ERMS,
> + alternative_call_2(clear_page_mov,
> + clear_page_rep_stosq, X86_FEATURE_REP_GOOD,
> + clear_page_rep_stosb, X86_FEATURE_ERMS,
> "=D" (page),
> "0" (page)
> : "memory", "rax", "rcx");
> --- a/arch/x86/include/asm/uaccess_64.h
> +++ b/arch/x86/include/asm/uaccess_64.h
> @@ -18,11 +18,11 @@
>
> /* Handles exceptions in both to and from, but doesn't do access_ok */
> __must_check unsigned long
> -copy_user_enhanced_fast_string(void *to, const void *from, unsigned len);
> +copy_user_rep_movsb(void *to, const void *from, unsigned len);
WARNING: Prefer 'unsigned int' to bare use of 'unsigned'
#62: FILE: arch/x86/include/asm/uaccess_64.h:21:
+copy_user_rep_movsb(void *to, const void *from, unsigned len);
Pls convert them while at it.
> __must_check unsigned long
> -copy_user_generic_string(void *to, const void *from, unsigned len);
> +copy_user_rep_movsq(void *to, const void *from, unsigned len);
> __must_check unsigned long
> -copy_user_generic_unrolled(void *to, const void *from, unsigned len);
> +copy_user_mov(void *to, const void *from, unsigned len);
>
> static __always_inline __must_check unsigned long
> copy_user_generic(void *to, const void *from, unsigned len)
> @@ -30,14 +30,14 @@ copy_user_generic(void *to, const void *from, unsigned len)
> unsigned ret;
>
> /*
> - * If CPU has ERMS feature, use copy_user_enhanced_fast_string.
> - * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
> - * Otherwise, use copy_user_generic_unrolled.
> + * If CPU has ERMS feature, use copy_user_rep_movsb.
> + * Otherwise, if CPU has rep_good feature, use copy_user_rep_movsq.
REP_GOOD, while you're at it. Also, end function names with ().
> + * Otherwise, use copy_user_mov.
> */
> - alternative_call_2(copy_user_generic_unrolled,
> - copy_user_generic_string,
> + alternative_call_2(copy_user_mov,
> + copy_user_rep_movsq,
> X86_FEATURE_REP_GOOD,
> - copy_user_enhanced_fast_string,
> + copy_user_rep_movsb,
> X86_FEATURE_ERMS,
> ASM_OUTPUT2("=a" (ret), "=D" (to), "=S" (from),
> "=d" (len)),
> --- a/arch/x86/lib/clear_page_64.S
> +++ b/arch/x86/lib/clear_page_64.S
> @@ -14,15 +14,15 @@
> * Zero a page.
> * %rdi - page
> */
> -ENTRY(clear_page_rep)
> +ENTRY(clear_page_rep_stosq)
> movl $4096/8,%ecx
> xorl %eax,%eax
> rep stosq
> ret
> -ENDPROC(clear_page_rep)
> -EXPORT_SYMBOL_GPL(clear_page_rep)
> +ENDPROC(clear_page_rep_stosq)
> +EXPORT_SYMBOL_GPL(clear_page_rep_stosq)
>
> -ENTRY(clear_page_orig)
> +ENTRY(clear_page_mov)
> xorl %eax,%eax
> movl $4096/64,%ecx
> .p2align 4
> @@ -41,13 +41,13 @@ ENTRY(clear_page_orig)
> jnz .Lloop
> nop
> ret
> -ENDPROC(clear_page_orig)
> -EXPORT_SYMBOL_GPL(clear_page_orig)
> +ENDPROC(clear_page_mov)
> +EXPORT_SYMBOL_GPL(clear_page_mov)
Same issue as with the previous patch: _orig was dumb but since you're
changing the names, pls change them to something more descriptive.
> -ENTRY(clear_page_erms)
> +ENTRY(clear_page_rep_stosb)
> movl $4096,%ecx
> xorl %eax,%eax
> rep stosb
> ret
> -ENDPROC(clear_page_erms)
> -EXPORT_SYMBOL_GPL(clear_page_erms)
> +ENDPROC(clear_page_rep_stosb)
> +EXPORT_SYMBOL_GPL(clear_page_rep_stosb)
> --- a/arch/x86/lib/copy_user_64.S
> +++ b/arch/x86/lib/copy_user_64.S
> @@ -17,7 +17,7 @@
> #include <asm/export.h>
>
> /*
> - * copy_user_generic_unrolled - memory copy with exception handling.
> + * copy_user_mov - memory copy with exception handling.
This rename is actually losing information from the function name:
"generic_unrolled" explains exactly what the function does.
> * This version is for CPUs like P4 that don't have efficient micro
> * code for rep movsq
> *
--
Regards/Gruss,
Boris.
Good mailing practices for 400: avoid top-posting and trim the reply.
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2017-05-05 16:58 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-26 18:23 [PATCH 1/5] x86_64: use REP MOVSB in copy_page() Alexey Dobriyan
2017-04-26 18:28 ` [PATCH 2/5] x86_64: inline copy_page() at call site Alexey Dobriyan
2017-04-26 18:30 ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Alexey Dobriyan
2017-04-26 18:34 ` [PATCH 4/5] x86_64: clobber "cc" in inlined clear_page() Alexey Dobriyan
2017-04-26 18:35 ` [PATCH 5/5] x86_64: garbage collect headers in clear_page.S Alexey Dobriyan
2017-05-05 16:58 ` [PATCH 3/5] x86_64: rename clear_page() and copy_user() variants Borislav Petkov
2017-04-28 21:04 ` [PATCH 2/5] x86_64: inline copy_page() at call site Borislav Petkov
2017-05-02 11:49 ` Alexey Dobriyan
2017-05-02 11:59 ` Borislav Petkov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).