* [PATCH RFC 1/2] [x86] Modify comments and clean up code.
@ 2012-10-11 12:28 ling.ma
2012-10-14 10:49 ` Borislav Petkov
0 siblings, 1 reply; 6+ messages in thread
From: ling.ma @ 2012-10-11 12:28 UTC (permalink / raw)
To: mingo; +Cc: hpa, tglx, linux-kernel, Ma Ling
From: Ma Ling <ling.ma@intel.com>
Modern CPU use fast-string instruction to accelerate copy performance,
by combining data into 128bit, so we modify comments and code style.
Signed-off-by: Ma Ling <ling.ma@intel.com>
---
arch/x86/lib/copy_page_64.S | 119 +++++++++++++++++++++----------------------
1 files changed, 59 insertions(+), 60 deletions(-)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 6b34d04..3da5527 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -5,91 +5,90 @@
#include <asm/alternative-asm.h>
ALIGN
-copy_page_c:
+copy_page_rep:
CFI_STARTPROC
- movl $4096/8,%ecx
- rep movsq
+ movl $4096/8, %ecx
+ rep movsq
ret
CFI_ENDPROC
-ENDPROC(copy_page_c)
+ENDPROC(copy_page_rep)
-/* Don't use streaming store because it's better when the target
- ends up in cache. */
-
-/* Could vary the prefetch distance based on SMP/UP */
+/*
+ Don't use streaming copy unless cpu indicate X86_FEATURE_REP_GOOD
+ Could vary the prefetch distance based on SMP/UP
+*/
ENTRY(copy_page)
CFI_STARTPROC
- subq $2*8,%rsp
+ subq $2*8, %rsp
CFI_ADJUST_CFA_OFFSET 2*8
- movq %rbx,(%rsp)
+ movq %rbx, (%rsp)
CFI_REL_OFFSET rbx, 0
- movq %r12,1*8(%rsp)
+ movq %r12, 1*8(%rsp)
CFI_REL_OFFSET r12, 1*8
- movl $(4096/64)-5,%ecx
+ movl $(4096/64)-5, %ecx
.p2align 4
.Loop64:
- dec %rcx
+ dec %rcx
- movq (%rsi), %rax
- movq 8 (%rsi), %rbx
- movq 16 (%rsi), %rdx
- movq 24 (%rsi), %r8
- movq 32 (%rsi), %r9
- movq 40 (%rsi), %r10
- movq 48 (%rsi), %r11
- movq 56 (%rsi), %r12
+ movq 0x8*0(%rsi), %rax
+ movq 0x8*1(%rsi), %rbx
+ movq 0x8*2(%rsi), %rdx
+ movq 0x8*3(%rsi), %r8
+ movq 0x8*4(%rsi), %r9
+ movq 0x8*5(%rsi), %r10
+ movq 0x8*6(%rsi), %r11
+ movq 0x8*7(%rsi), %r12
prefetcht0 5*64(%rsi)
- movq %rax, (%rdi)
- movq %rbx, 8 (%rdi)
- movq %rdx, 16 (%rdi)
- movq %r8, 24 (%rdi)
- movq %r9, 32 (%rdi)
- movq %r10, 40 (%rdi)
- movq %r11, 48 (%rdi)
- movq %r12, 56 (%rdi)
+ movq %rax, 0x8*0(%rdi)
+ movq %rbx, 0x8*1(%rdi)
+ movq %rdx, 0x8*2(%rdi)
+ movq %r8, 0x8*3(%rdi)
+ movq %r9, 0x8*4(%rdi)
+ movq %r10, 0x8*5(%rdi)
+ movq %r11, 0x8*6(%rdi)
+ movq %r12, 0x8*7(%rdi)
- leaq 64 (%rsi), %rsi
- leaq 64 (%rdi), %rdi
+ leaq 64 (%rsi), %rsi
+ leaq 64 (%rdi), %rdi
- jnz .Loop64
+ jnz .Loop64
- movl $5,%ecx
+ movl $5, %ecx
.p2align 4
.Loop2:
- decl %ecx
-
- movq (%rsi), %rax
- movq 8 (%rsi), %rbx
- movq 16 (%rsi), %rdx
- movq 24 (%rsi), %r8
- movq 32 (%rsi), %r9
- movq 40 (%rsi), %r10
- movq 48 (%rsi), %r11
- movq 56 (%rsi), %r12
-
- movq %rax, (%rdi)
- movq %rbx, 8 (%rdi)
- movq %rdx, 16 (%rdi)
- movq %r8, 24 (%rdi)
- movq %r9, 32 (%rdi)
- movq %r10, 40 (%rdi)
- movq %r11, 48 (%rdi)
- movq %r12, 56 (%rdi)
-
- leaq 64(%rdi),%rdi
- leaq 64(%rsi),%rsi
-
+ decl %ecx
+
+ movq 0x8*0(%rsi), %rax
+ movq 0x8*1(%rsi), %rbx
+ movq 0x8*2(%rsi), %rdx
+ movq 0x8*3(%rsi), %r8
+ movq 0x8*4(%rsi), %r9
+ movq 0x8*5(%rsi), %r10
+ movq 0x8*6(%rsi), %r11
+ movq 0x8*7(%rsi), %r12
+
+ movq %rax, 0x8*0(%rdi)
+ movq %rbx, 0x8*1(%rdi)
+ movq %rdx, 0x8*2(%rdi)
+ movq %r8, 0x8*3(%rdi)
+ movq %r9, 0x8*4(%rdi)
+ movq %r10, 0x8*5(%rdi)
+ movq %r11, 0x8*6(%rdi)
+ movq %r12, 0x8*7(%rdi)
+
+ leaq 64(%rdi), %rdi
+ leaq 64(%rsi), %rsi
jnz .Loop2
- movq (%rsp),%rbx
+ movq (%rsp), %rbx
CFI_RESTORE rbx
- movq 1*8(%rsp),%r12
+ movq 1*8(%rsp), %r12
CFI_RESTORE r12
- addq $2*8,%rsp
+ addq $2*8, %rsp
CFI_ADJUST_CFA_OFFSET -2*8
ret
.Lcopy_page_end:
@@ -103,7 +102,7 @@ ENDPROC(copy_page)
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
- .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */
+ .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
--
1.6.5.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH RFC 1/2] [x86] Modify comments and clean up code.
2012-10-11 12:28 [PATCH RFC 1/2] [x86] Modify comments and clean up code ling.ma
@ 2012-10-14 10:49 ` Borislav Petkov
0 siblings, 0 replies; 6+ messages in thread
From: Borislav Petkov @ 2012-10-14 10:49 UTC (permalink / raw)
To: ling.ma; +Cc: mingo, hpa, tglx, linux-kernel
On Thu, Oct 11, 2012 at 08:28:44PM +0800, ling.ma@intel.com wrote:
> From: Ma Ling <ling.ma@intel.com>
>
> Modern CPU use fast-string instruction to accelerate copy performance,
> by combining data into 128bit, so we modify comments and code style.
>
> Signed-off-by: Ma Ling <ling.ma@intel.com>
>
> ---
> arch/x86/lib/copy_page_64.S | 119 +++++++++++++++++++++----------------------
> 1 files changed, 59 insertions(+), 60 deletions(-)
>
> diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
> index 6b34d04..3da5527 100644
> --- a/arch/x86/lib/copy_page_64.S
> +++ b/arch/x86/lib/copy_page_64.S
> @@ -5,91 +5,90 @@
> #include <asm/alternative-asm.h>
>
> ALIGN
> -copy_page_c:
> +copy_page_rep:
> CFI_STARTPROC
> - movl $4096/8,%ecx
> - rep movsq
> + movl $4096/8, %ecx
> + rep movsq
> ret
> CFI_ENDPROC
> -ENDPROC(copy_page_c)
> +ENDPROC(copy_page_rep)
>
> -/* Don't use streaming store because it's better when the target
> - ends up in cache. */
> -
> -/* Could vary the prefetch distance based on SMP/UP */
> +/*
> + Don't use streaming copy unless cpu indicate X86_FEATURE_REP_GOOD
> + Could vary the prefetch distance based on SMP/UP
> +*/
Kernel comment style formatting is:
/*
* <something not trivial to see worth adding comment for>
* <optional second line of that statement>
*/
>
> ENTRY(copy_page)
> CFI_STARTPROC
> - subq $2*8,%rsp
> + subq $2*8, %rsp
> CFI_ADJUST_CFA_OFFSET 2*8
> - movq %rbx,(%rsp)
> + movq %rbx, (%rsp)
> CFI_REL_OFFSET rbx, 0
> - movq %r12,1*8(%rsp)
> + movq %r12, 1*8(%rsp)
> CFI_REL_OFFSET r12, 1*8
>
> - movl $(4096/64)-5,%ecx
> + movl $(4096/64)-5, %ecx
> .p2align 4
> .Loop64:
> - dec %rcx
> + dec %rcx
Applying: Modify comments and clean up code.
/home/boris/kernel/linux-2.6/.git/rebase-apply/patch:51: space before tab in indent.
dec %rcx
warning: 1 line adds whitespace errors.
>
> - movq (%rsi), %rax
> - movq 8 (%rsi), %rbx
> - movq 16 (%rsi), %rdx
> - movq 24 (%rsi), %r8
> - movq 32 (%rsi), %r9
> - movq 40 (%rsi), %r10
> - movq 48 (%rsi), %r11
> - movq 56 (%rsi), %r12
> + movq 0x8*0(%rsi), %rax
> + movq 0x8*1(%rsi), %rbx
> + movq 0x8*2(%rsi), %rdx
> + movq 0x8*3(%rsi), %r8
> + movq 0x8*4(%rsi), %r9
> + movq 0x8*5(%rsi), %r10
> + movq 0x8*6(%rsi), %r11
> + movq 0x8*7(%rsi), %r12
[ … ]
--
Regards/Gruss,
Boris.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH RFC 1/2 ] [x86] Modify comments and clean up code.
2011-07-01 20:15 ` Andi Kleen
@ 2011-07-01 20:21 ` Ingo Molnar
0 siblings, 0 replies; 6+ messages in thread
From: Ingo Molnar @ 2011-07-01 20:21 UTC (permalink / raw)
To: Andi Kleen; +Cc: ling.ma, hpa, tglx, linux-kernel
* Andi Kleen <andi@firstfloor.org> wrote:
> ling.ma@intel.com writes:
>
> > From: Ma Ling <ling.ma@intel.com>
> >
> > Modern CPU use fast-string instruction to accelerate copy performance,
> > by combining data into 128bit, so we modify comments and code style.
>
> The description has nothing to do with the patch.
>
> As far as I can tell the patch just renames one symbol
> and changes all white space. Not fully sure what the point of it
> is.
>
> But normally white space change patches are not supposed to have
> any other changes because the other changes embedded in them are
> impossible to review.
Non-functional cleanups can go into the same patch as whitespace
cleanups, as long as the generated object code does not change.
Thanks,
Ingo
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH RFC 1/2 ] [x86] Modify comments and clean up code.
2011-06-28 22:36 [PATCH RFC 1/2 ] " ling.ma
2011-07-01 12:40 ` Ingo Molnar
@ 2011-07-01 20:15 ` Andi Kleen
2011-07-01 20:21 ` Ingo Molnar
1 sibling, 1 reply; 6+ messages in thread
From: Andi Kleen @ 2011-07-01 20:15 UTC (permalink / raw)
To: ling.ma; +Cc: mingo, hpa, tglx, linux-kernel
ling.ma@intel.com writes:
> From: Ma Ling <ling.ma@intel.com>
>
> Modern CPU use fast-string instruction to accelerate copy performance,
> by combining data into 128bit, so we modify comments and code style.
The description has nothing to do with the patch.
As far as I can tell the patch just renames one symbol
and changes all white space. Not fully sure what the point of it
is.
But normally white space change patches are not supposed to have
any other changes because the other changes embedded in them
are impossible to review.
-Andi
--
ak@linux.intel.com -- Speaking for myself only
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH RFC 1/2 ] [x86] Modify comments and clean up code.
2011-06-28 22:36 [PATCH RFC 1/2 ] " ling.ma
@ 2011-07-01 12:40 ` Ingo Molnar
2011-07-01 20:15 ` Andi Kleen
1 sibling, 0 replies; 6+ messages in thread
From: Ingo Molnar @ 2011-07-01 12:40 UTC (permalink / raw)
To: ling.ma; +Cc: hpa, tglx, linux-kernel
* ling.ma@intel.com <ling.ma@intel.com> wrote:
> From: Ma Ling <ling.ma@intel.com>
>
> Modern CPU use fast-string instruction to accelerate copy performance,
> by combining data into 128bit, so we modify comments and code style.
>
> Thanks
> Ling
>
> ---
> arch/x86/lib/copy_page_64.S | 124 +++++++++++++++++++++---------------------
> 1 files changed, 62 insertions(+), 62 deletions(-)
Looks good in principle but there's no SOB so i cannot apply it.
Thanks,
Ingo
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH RFC 1/2 ] [x86] Modify comments and clean up code.
@ 2011-06-28 22:36 ling.ma
2011-07-01 12:40 ` Ingo Molnar
2011-07-01 20:15 ` Andi Kleen
0 siblings, 2 replies; 6+ messages in thread
From: ling.ma @ 2011-06-28 22:36 UTC (permalink / raw)
To: mingo; +Cc: hpa, tglx, linux-kernel, ling.ma
From: Ma Ling <ling.ma@intel.com>
Modern CPU use fast-string instruction to accelerate copy performance,
by combining data into 128bit, so we modify comments and code style.
Thanks
Ling
---
arch/x86/lib/copy_page_64.S | 124 +++++++++++++++++++++---------------------
1 files changed, 62 insertions(+), 62 deletions(-)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 6fec2d1..45f7db7 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -2,97 +2,98 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
ALIGN
-copy_page_c:
+copy_page_rep:
CFI_STARTPROC
- movl $4096/8,%ecx
- rep movsq
+ movl $4096/8, %ecx
+ rep movsq
ret
CFI_ENDPROC
-ENDPROC(copy_page_c)
+ENDPROC(copy_page_rep)
-/* Don't use streaming store because it's better when the target
- ends up in cache. */
-
-/* Could vary the prefetch distance based on SMP/UP */
+/*
+ Don't use streaming copy unless cpu indicate X86_FEATURE_REP_GOOD
+ Could vary the prefetch distance based on SMP/UP
+*/
ENTRY(copy_page)
CFI_STARTPROC
- subq $3*8,%rsp
+ subq $3*8, %rsp
CFI_ADJUST_CFA_OFFSET 3*8
- movq %rbx,(%rsp)
+ movq %rbx, (%rsp)
CFI_REL_OFFSET rbx, 0
- movq %r12,1*8(%rsp)
+ movq %r12, 1*8(%rsp)
CFI_REL_OFFSET r12, 1*8
- movq %r13,2*8(%rsp)
+ movq %r13, 2*8(%rsp)
CFI_REL_OFFSET r13, 2*8
- movl $(4096/64)-5,%ecx
+ movl $(4096/64)-5, %ecx
.p2align 4
.Loop64:
- dec %rcx
+ dec %rcx
- movq (%rsi), %rax
- movq 8 (%rsi), %rbx
- movq 16 (%rsi), %rdx
- movq 24 (%rsi), %r8
- movq 32 (%rsi), %r9
- movq 40 (%rsi), %r10
- movq 48 (%rsi), %r11
- movq 56 (%rsi), %r12
+ movq 0x8*0(%rsi), %rax
+ movq 0x8*1(%rsi), %rbx
+ movq 0x8*2(%rsi), %rdx
+ movq 0x8*3(%rsi), %r8
+ movq 0x8*4(%rsi), %r9
+ movq 0x8*5(%rsi), %r10
+ movq 0x8*6(%rsi), %r11
+ movq 0x8*7(%rsi), %r12
prefetcht0 5*64(%rsi)
- movq %rax, (%rdi)
- movq %rbx, 8 (%rdi)
- movq %rdx, 16 (%rdi)
- movq %r8, 24 (%rdi)
- movq %r9, 32 (%rdi)
- movq %r10, 40 (%rdi)
- movq %r11, 48 (%rdi)
- movq %r12, 56 (%rdi)
+ movq %rax, 0x8*0(%rdi)
+ movq %rbx, 0x8*1(%rdi)
+ movq %rdx, 0x8*2(%rdi)
+ movq %r8, 0x8*3(%rdi)
+ movq %r9, 0x8*4(%rdi)
+ movq %r10, 0x8*5(%rdi)
+ movq %r11, 0x8*6(%rdi)
+ movq %r12, 0x8*7(%rdi)
- leaq 64 (%rsi), %rsi
- leaq 64 (%rdi), %rdi
+ leaq 64 (%rsi), %rsi
+ leaq 64 (%rdi), %rdi
- jnz .Loop64
+ jnz .Loop64
- movl $5,%ecx
+ movl $5, %ecx
.p2align 4
.Loop2:
- decl %ecx
-
- movq (%rsi), %rax
- movq 8 (%rsi), %rbx
- movq 16 (%rsi), %rdx
- movq 24 (%rsi), %r8
- movq 32 (%rsi), %r9
- movq 40 (%rsi), %r10
- movq 48 (%rsi), %r11
- movq 56 (%rsi), %r12
-
- movq %rax, (%rdi)
- movq %rbx, 8 (%rdi)
- movq %rdx, 16 (%rdi)
- movq %r8, 24 (%rdi)
- movq %r9, 32 (%rdi)
- movq %r10, 40 (%rdi)
- movq %r11, 48 (%rdi)
- movq %r12, 56 (%rdi)
-
- leaq 64(%rdi),%rdi
- leaq 64(%rsi),%rsi
+ decl %ecx
+
+ movq 0x8*0(%rsi), %rax
+ movq 0x8*1(%rsi), %rbx
+ movq 0x8*2(%rsi), %rdx
+ movq 0x8*3(%rsi), %r8
+ movq 0x8*4(%rsi), %r9
+ movq 0x8*5(%rsi), %r10
+ movq 0x8*6(%rsi), %r11
+ movq 0x8*7(%rsi), %r12
+
+ movq %rax, 0x8*0(%rdi)
+ movq %rbx, 0x8*1(%rdi)
+ movq %rdx, 0x8*2(%rdi)
+ movq %r8, 0x8*3(%rdi)
+ movq %r9, 0x8*4(%rdi)
+ movq %r10, 0x8*5(%rdi)
+ movq %r11, 0x8*6(%rdi)
+ movq %r12, 0x8*7(%rdi)
+
+ leaq 64(%rdi), %rdi
+ leaq 64(%rsi), %rsi
jnz .Loop2
- movq (%rsp),%rbx
+ movq (%rsp), %rbx
CFI_RESTORE rbx
- movq 1*8(%rsp),%r12
+ movq 1*8(%rsp), %r12
CFI_RESTORE r12
- movq 2*8(%rsp),%r13
+ movq 2*8(%rsp), %r13
CFI_RESTORE r13
- addq $3*8,%rsp
+ addq $3*8, %rsp
CFI_ADJUST_CFA_OFFSET -3*8
ret
.Lcopy_page_end:
@@ -102,11 +103,10 @@ ENDPROC(copy_page)
/* Some CPUs run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */
-#include <asm/cpufeature.h>
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
- .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */
+ .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
--
1.6.5.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2012-10-14 10:49 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-11 12:28 [PATCH RFC 1/2] [x86] Modify comments and clean up code ling.ma
2012-10-14 10:49 ` Borislav Petkov
-- strict thread matches above, loose matches on Subject: below --
2011-06-28 22:36 [PATCH RFC 1/2 ] " ling.ma
2011-07-01 12:40 ` Ingo Molnar
2011-07-01 20:15 ` Andi Kleen
2011-07-01 20:21 ` Ingo Molnar
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).