linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFC 1/2] [x86] Modify comments and clean up code.
@ 2012-10-11 12:28 ling.ma
  2012-10-14 10:49 ` Borislav Petkov
  0 siblings, 1 reply; 6+ messages in thread
From: ling.ma @ 2012-10-11 12:28 UTC (permalink / raw)
  To: mingo; +Cc: hpa, tglx, linux-kernel, Ma Ling

From: Ma Ling <ling.ma@intel.com>

Modern CPU use fast-string instruction to accelerate copy performance,
by combining data into 128bit, so we modify comments and code style.

Signed-off-by: Ma Ling <ling.ma@intel.com>

---
 arch/x86/lib/copy_page_64.S |  119 +++++++++++++++++++++----------------------
 1 files changed, 59 insertions(+), 60 deletions(-)

diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 6b34d04..3da5527 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -5,91 +5,90 @@
 #include <asm/alternative-asm.h>
 
 	ALIGN
-copy_page_c:
+copy_page_rep:
 	CFI_STARTPROC
-	movl $4096/8,%ecx
-	rep movsq
+	movl	$4096/8, %ecx
+	rep	movsq
 	ret
 	CFI_ENDPROC
-ENDPROC(copy_page_c)
+ENDPROC(copy_page_rep)
 
-/* Don't use streaming store because it's better when the target
-   ends up in cache. */
-	    
-/* Could vary the prefetch distance based on SMP/UP */
+/*
+   Don't use streaming copy unless cpu indicate X86_FEATURE_REP_GOOD
+   Could vary the prefetch distance based on SMP/UP
+*/
 
 ENTRY(copy_page)
 	CFI_STARTPROC
-	subq	$2*8,%rsp
+	subq	$2*8,	%rsp
 	CFI_ADJUST_CFA_OFFSET 2*8
-	movq	%rbx,(%rsp)
+	movq	%rbx,	(%rsp)
 	CFI_REL_OFFSET rbx, 0
-	movq	%r12,1*8(%rsp)
+	movq	%r12,	1*8(%rsp)
 	CFI_REL_OFFSET r12, 1*8
 
-	movl	$(4096/64)-5,%ecx
+	movl	$(4096/64)-5,	%ecx
 	.p2align 4
 .Loop64:
-  	dec     %rcx
+  	dec	%rcx
 
-	movq        (%rsi), %rax
-	movq      8 (%rsi), %rbx
-	movq     16 (%rsi), %rdx
-	movq     24 (%rsi), %r8
-	movq     32 (%rsi), %r9
-	movq     40 (%rsi), %r10
-	movq     48 (%rsi), %r11
-	movq     56 (%rsi), %r12
+	movq	0x8*0(%rsi), %rax
+	movq	0x8*1(%rsi), %rbx
+	movq	0x8*2(%rsi), %rdx
+	movq	0x8*3(%rsi), %r8
+	movq	0x8*4(%rsi), %r9
+	movq	0x8*5(%rsi), %r10
+	movq	0x8*6(%rsi), %r11
+	movq	0x8*7(%rsi), %r12
 
 	prefetcht0 5*64(%rsi)
 
-	movq     %rax,    (%rdi)
-	movq     %rbx,  8 (%rdi)
-	movq     %rdx, 16 (%rdi)
-	movq     %r8,  24 (%rdi)
-	movq     %r9,  32 (%rdi)
-	movq     %r10, 40 (%rdi)
-	movq     %r11, 48 (%rdi)
-	movq     %r12, 56 (%rdi)
+	movq	%rax, 0x8*0(%rdi)
+	movq	%rbx, 0x8*1(%rdi)
+	movq	%rdx, 0x8*2(%rdi)
+	movq	%r8,  0x8*3(%rdi)
+	movq	%r9,  0x8*4(%rdi)
+	movq	%r10, 0x8*5(%rdi)
+	movq	%r11, 0x8*6(%rdi)
+	movq	%r12, 0x8*7(%rdi)
 
-	leaq    64 (%rsi), %rsi
-	leaq    64 (%rdi), %rdi
+	leaq	64 (%rsi), %rsi
+	leaq	64 (%rdi), %rdi
 
-	jnz     .Loop64
+	jnz	.Loop64
 
-	movl	$5,%ecx
+	movl	$5, %ecx
 	.p2align 4
 .Loop2:
-	decl   %ecx
-
-	movq        (%rsi), %rax
-	movq      8 (%rsi), %rbx
-	movq     16 (%rsi), %rdx
-	movq     24 (%rsi), %r8
-	movq     32 (%rsi), %r9
-	movq     40 (%rsi), %r10
-	movq     48 (%rsi), %r11
-	movq     56 (%rsi), %r12
-
-	movq     %rax,    (%rdi)
-	movq     %rbx,  8 (%rdi)
-	movq     %rdx, 16 (%rdi)
-	movq     %r8,  24 (%rdi)
-	movq     %r9,  32 (%rdi)
-	movq     %r10, 40 (%rdi)
-	movq     %r11, 48 (%rdi)
-	movq     %r12, 56 (%rdi)
-
-	leaq	64(%rdi),%rdi
-	leaq	64(%rsi),%rsi
-
+	decl	%ecx
+
+	movq	0x8*0(%rsi), %rax
+	movq	0x8*1(%rsi), %rbx
+	movq	0x8*2(%rsi), %rdx
+	movq	0x8*3(%rsi), %r8
+	movq	0x8*4(%rsi), %r9
+	movq	0x8*5(%rsi), %r10
+	movq	0x8*6(%rsi), %r11
+	movq	0x8*7(%rsi), %r12
+
+	movq	%rax, 0x8*0(%rdi)
+	movq	%rbx, 0x8*1(%rdi)
+	movq	%rdx, 0x8*2(%rdi)
+	movq	%r8,  0x8*3(%rdi)
+	movq	%r9,  0x8*4(%rdi)
+	movq	%r10, 0x8*5(%rdi)
+	movq	%r11, 0x8*6(%rdi)
+	movq	%r12, 0x8*7(%rdi)
+
+	leaq	64(%rdi), %rdi
+	leaq	64(%rsi), %rsi
 	jnz	.Loop2
 
-	movq	(%rsp),%rbx
+	movq	(%rsp), %rbx
 	CFI_RESTORE rbx
-	movq	1*8(%rsp),%r12
+	movq	1*8(%rsp), %r12
 	CFI_RESTORE r12
-	addq	$2*8,%rsp
+	addq	$2*8, %rsp
 	CFI_ADJUST_CFA_OFFSET -2*8
 	ret
 .Lcopy_page_end:
@@ -103,7 +102,7 @@ ENDPROC(copy_page)
 
 	.section .altinstr_replacement,"ax"
 1:	.byte 0xeb					/* jmp <disp8> */
-	.byte (copy_page_c - copy_page) - (2f - 1b)	/* offset */
+	.byte (copy_page_rep - copy_page) - (2f - 1b)	/* offset */
 2:
 	.previous
 	.section .altinstructions,"a"
-- 
1.6.5.2


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH RFC 1/2] [x86] Modify comments and clean up code.
  2012-10-11 12:28 [PATCH RFC 1/2] [x86] Modify comments and clean up code ling.ma
@ 2012-10-14 10:49 ` Borislav Petkov
  0 siblings, 0 replies; 6+ messages in thread
From: Borislav Petkov @ 2012-10-14 10:49 UTC (permalink / raw)
  To: ling.ma; +Cc: mingo, hpa, tglx, linux-kernel

On Thu, Oct 11, 2012 at 08:28:44PM +0800, ling.ma@intel.com wrote:
> From: Ma Ling <ling.ma@intel.com>
> 
> Modern CPU use fast-string instruction to accelerate copy performance,
> by combining data into 128bit, so we modify comments and code style.
> 
> Signed-off-by: Ma Ling <ling.ma@intel.com>
> 
> ---
>  arch/x86/lib/copy_page_64.S |  119 +++++++++++++++++++++----------------------
>  1 files changed, 59 insertions(+), 60 deletions(-)
> 
> diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
> index 6b34d04..3da5527 100644
> --- a/arch/x86/lib/copy_page_64.S
> +++ b/arch/x86/lib/copy_page_64.S
> @@ -5,91 +5,90 @@
>  #include <asm/alternative-asm.h>
>  
>  	ALIGN
> -copy_page_c:
> +copy_page_rep:
>  	CFI_STARTPROC
> -	movl $4096/8,%ecx
> -	rep movsq
> +	movl	$4096/8, %ecx
> +	rep	movsq
>  	ret
>  	CFI_ENDPROC
> -ENDPROC(copy_page_c)
> +ENDPROC(copy_page_rep)
>  
> -/* Don't use streaming store because it's better when the target
> -   ends up in cache. */
> -	    
> -/* Could vary the prefetch distance based on SMP/UP */
> +/*
> +   Don't use streaming copy unless cpu indicate X86_FEATURE_REP_GOOD
> +   Could vary the prefetch distance based on SMP/UP
> +*/

Kernel comment style formatting is:

 /*
  * <something not trivial to see worth adding comment for>
  * <optional second line of that statement>
  */

>  
>  ENTRY(copy_page)
>  	CFI_STARTPROC
> -	subq	$2*8,%rsp
> +	subq	$2*8,	%rsp
>  	CFI_ADJUST_CFA_OFFSET 2*8
> -	movq	%rbx,(%rsp)
> +	movq	%rbx,	(%rsp)
>  	CFI_REL_OFFSET rbx, 0
> -	movq	%r12,1*8(%rsp)
> +	movq	%r12,	1*8(%rsp)
>  	CFI_REL_OFFSET r12, 1*8
>  
> -	movl	$(4096/64)-5,%ecx
> +	movl	$(4096/64)-5,	%ecx
>  	.p2align 4
>  .Loop64:
> -  	dec     %rcx
> +  	dec	%rcx

Applying: Modify comments and clean up code.
/home/boris/kernel/linux-2.6/.git/rebase-apply/patch:51: space before tab in indent.
        dec     %rcx
warning: 1 line adds whitespace errors.

>  
> -	movq        (%rsi), %rax
> -	movq      8 (%rsi), %rbx
> -	movq     16 (%rsi), %rdx
> -	movq     24 (%rsi), %r8
> -	movq     32 (%rsi), %r9
> -	movq     40 (%rsi), %r10
> -	movq     48 (%rsi), %r11
> -	movq     56 (%rsi), %r12
> +	movq	0x8*0(%rsi), %rax
> +	movq	0x8*1(%rsi), %rbx
> +	movq	0x8*2(%rsi), %rdx
> +	movq	0x8*3(%rsi), %r8
> +	movq	0x8*4(%rsi), %r9
> +	movq	0x8*5(%rsi), %r10
> +	movq	0x8*6(%rsi), %r11
> +	movq	0x8*7(%rsi), %r12

[ … ]

-- 
Regards/Gruss,
    Boris.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH RFC 1/2 ] [x86] Modify comments and clean up code.
  2011-07-01 20:15 ` Andi Kleen
@ 2011-07-01 20:21   ` Ingo Molnar
  0 siblings, 0 replies; 6+ messages in thread
From: Ingo Molnar @ 2011-07-01 20:21 UTC (permalink / raw)
  To: Andi Kleen; +Cc: ling.ma, hpa, tglx, linux-kernel


* Andi Kleen <andi@firstfloor.org> wrote:

> ling.ma@intel.com writes:
> 
> > From: Ma Ling <ling.ma@intel.com>
> >
> > Modern CPU use fast-string instruction to accelerate copy performance,
> > by combining data into 128bit, so we modify comments and code style.
> 
> The description has nothing to do with the patch.
> 
> As far as I can tell the patch just renames one symbol
> and changes all white space. Not fully sure what the point of it 
> is. 
> 
> But normally white space change patches are not supposed to have 
> any other changes because the other changes embedded in them are 
> impossible to review.

Non-functional cleanups can go into the same patch as whitespace 
cleanups, as long as the generated object code does not change.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH RFC 1/2 ] [x86] Modify comments and clean up code.
  2011-06-28 22:36 [PATCH RFC 1/2 ] " ling.ma
  2011-07-01 12:40 ` Ingo Molnar
@ 2011-07-01 20:15 ` Andi Kleen
  2011-07-01 20:21   ` Ingo Molnar
  1 sibling, 1 reply; 6+ messages in thread
From: Andi Kleen @ 2011-07-01 20:15 UTC (permalink / raw)
  To: ling.ma; +Cc: mingo, hpa, tglx, linux-kernel

ling.ma@intel.com writes:

> From: Ma Ling <ling.ma@intel.com>
>
> Modern CPU use fast-string instruction to accelerate copy performance,
> by combining data into 128bit, so we modify comments and code style.

The description has nothing to do with the patch.

As far as I can tell the patch just renames one symbol
and changes all white space. Not fully sure what the point of it 
is. 

But normally white space change patches are not supposed to have
any other changes because the other changes embedded in them
are impossible to review.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH RFC 1/2 ] [x86] Modify comments and clean up code.
  2011-06-28 22:36 [PATCH RFC 1/2 ] " ling.ma
@ 2011-07-01 12:40 ` Ingo Molnar
  2011-07-01 20:15 ` Andi Kleen
  1 sibling, 0 replies; 6+ messages in thread
From: Ingo Molnar @ 2011-07-01 12:40 UTC (permalink / raw)
  To: ling.ma; +Cc: hpa, tglx, linux-kernel


* ling.ma@intel.com <ling.ma@intel.com> wrote:

> From: Ma Ling <ling.ma@intel.com>
> 
> Modern CPU use fast-string instruction to accelerate copy performance,
> by combining data into 128bit, so we modify comments and code style.
> 
> Thanks
> Ling
> 
> ---
>  arch/x86/lib/copy_page_64.S |  124 +++++++++++++++++++++---------------------
>  1 files changed, 62 insertions(+), 62 deletions(-)

Looks good in principle but there's no SOB so i cannot apply it.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH RFC 1/2 ] [x86] Modify comments and clean up code.
@ 2011-06-28 22:36 ling.ma
  2011-07-01 12:40 ` Ingo Molnar
  2011-07-01 20:15 ` Andi Kleen
  0 siblings, 2 replies; 6+ messages in thread
From: ling.ma @ 2011-06-28 22:36 UTC (permalink / raw)
  To: mingo; +Cc: hpa, tglx, linux-kernel, ling.ma

From: Ma Ling <ling.ma@intel.com>

Modern CPU use fast-string instruction to accelerate copy performance,
by combining data into 128bit, so we modify comments and code style.

Thanks
Ling

---
 arch/x86/lib/copy_page_64.S |  124 +++++++++++++++++++++---------------------
 1 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 6fec2d1..45f7db7 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -2,97 +2,98 @@
 
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
 
 	ALIGN
-copy_page_c:
+copy_page_rep:
 	CFI_STARTPROC
-	movl $4096/8,%ecx
-	rep movsq
+	movl	$4096/8, %ecx
+	rep	movsq
 	ret
 	CFI_ENDPROC
-ENDPROC(copy_page_c)
+ENDPROC(copy_page_rep)
 
-/* Don't use streaming store because it's better when the target
-   ends up in cache. */
-	    
-/* Could vary the prefetch distance based on SMP/UP */
+/*
+   Don't use streaming copy unless cpu indicate X86_FEATURE_REP_GOOD
+   Could vary the prefetch distance based on SMP/UP
+*/
 
 ENTRY(copy_page)
 	CFI_STARTPROC
-	subq	$3*8,%rsp
+	subq	$3*8, %rsp
 	CFI_ADJUST_CFA_OFFSET 3*8
-	movq	%rbx,(%rsp)
+	movq	%rbx, (%rsp)
 	CFI_REL_OFFSET rbx, 0
-	movq	%r12,1*8(%rsp)
+	movq	%r12, 1*8(%rsp)
 	CFI_REL_OFFSET r12, 1*8
-	movq	%r13,2*8(%rsp)
+	movq	%r13, 2*8(%rsp)
 	CFI_REL_OFFSET r13, 2*8
 
-	movl	$(4096/64)-5,%ecx
+	movl	$(4096/64)-5, %ecx
 	.p2align 4
 .Loop64:
-  	dec     %rcx
+  	dec	%rcx
 
-	movq        (%rsi), %rax
-	movq      8 (%rsi), %rbx
-	movq     16 (%rsi), %rdx
-	movq     24 (%rsi), %r8
-	movq     32 (%rsi), %r9
-	movq     40 (%rsi), %r10
-	movq     48 (%rsi), %r11
-	movq     56 (%rsi), %r12
+	movq	0x8*0(%rsi), %rax
+	movq	0x8*1(%rsi), %rbx
+	movq	0x8*2(%rsi), %rdx
+	movq	0x8*3(%rsi), %r8
+	movq	0x8*4(%rsi), %r9
+	movq	0x8*5(%rsi), %r10
+	movq	0x8*6(%rsi), %r11
+	movq	0x8*7(%rsi), %r12
 
 	prefetcht0 5*64(%rsi)
 
-	movq     %rax,    (%rdi)
-	movq     %rbx,  8 (%rdi)
-	movq     %rdx, 16 (%rdi)
-	movq     %r8,  24 (%rdi)
-	movq     %r9,  32 (%rdi)
-	movq     %r10, 40 (%rdi)
-	movq     %r11, 48 (%rdi)
-	movq     %r12, 56 (%rdi)
+	movq	%rax, 0x8*0(%rdi)
+	movq	%rbx, 0x8*1(%rdi)
+	movq	%rdx, 0x8*2(%rdi)
+	movq	%r8,  0x8*3(%rdi)
+	movq	%r9,  0x8*4(%rdi)
+	movq	%r10, 0x8*5(%rdi)
+	movq	%r11, 0x8*6(%rdi)
+	movq	%r12, 0x8*7(%rdi)
 
-	leaq    64 (%rsi), %rsi
-	leaq    64 (%rdi), %rdi
+	leaq	64 (%rsi), %rsi
+	leaq	64 (%rdi), %rdi
 
-	jnz     .Loop64
+	jnz	.Loop64
 
-	movl	$5,%ecx
+	movl	$5, %ecx
 	.p2align 4
 .Loop2:
-	decl   %ecx
-
-	movq        (%rsi), %rax
-	movq      8 (%rsi), %rbx
-	movq     16 (%rsi), %rdx
-	movq     24 (%rsi), %r8
-	movq     32 (%rsi), %r9
-	movq     40 (%rsi), %r10
-	movq     48 (%rsi), %r11
-	movq     56 (%rsi), %r12
-
-	movq     %rax,    (%rdi)
-	movq     %rbx,  8 (%rdi)
-	movq     %rdx, 16 (%rdi)
-	movq     %r8,  24 (%rdi)
-	movq     %r9,  32 (%rdi)
-	movq     %r10, 40 (%rdi)
-	movq     %r11, 48 (%rdi)
-	movq     %r12, 56 (%rdi)
-
-	leaq	64(%rdi),%rdi
-	leaq	64(%rsi),%rsi
+	decl	%ecx
+
+	movq	0x8*0(%rsi), %rax
+	movq	0x8*1(%rsi), %rbx
+	movq	0x8*2(%rsi), %rdx
+	movq	0x8*3(%rsi), %r8
+	movq	0x8*4(%rsi), %r9
+	movq	0x8*5(%rsi), %r10
+	movq	0x8*6(%rsi), %r11
+	movq	0x8*7(%rsi), %r12
+
+	movq	%rax, 0x8*0(%rdi)
+	movq	%rbx, 0x8*1(%rdi)
+	movq	%rdx, 0x8*2(%rdi)
+	movq	%r8,  0x8*3(%rdi)
+	movq	%r9,  0x8*4(%rdi)
+	movq	%r10, 0x8*5(%rdi)
+	movq	%r11, 0x8*6(%rdi)
+	movq	%r12, 0x8*7(%rdi)
+
+	leaq	64(%rdi), %rdi
+	leaq	64(%rsi), %rsi
 
 	jnz	.Loop2
 
-	movq	(%rsp),%rbx
+	movq	(%rsp), %rbx
 	CFI_RESTORE rbx
-	movq	1*8(%rsp),%r12
+	movq	1*8(%rsp), %r12
 	CFI_RESTORE r12
-	movq	2*8(%rsp),%r13
+	movq	2*8(%rsp), %r13
 	CFI_RESTORE r13
-	addq	$3*8,%rsp
+	addq	$3*8, %rsp
 	CFI_ADJUST_CFA_OFFSET -3*8
 	ret
 .Lcopy_page_end:
@@ -102,11 +103,10 @@ ENDPROC(copy_page)
 	/* Some CPUs run faster using the string copy instructions.
 	   It is also a lot simpler. Use this when possible */
 
-#include <asm/cpufeature.h>
 
 	.section .altinstr_replacement,"ax"
 1:	.byte 0xeb					/* jmp <disp8> */
-	.byte (copy_page_c - copy_page) - (2f - 1b)	/* offset */
+	.byte (copy_page_rep - copy_page) - (2f - 1b)	/* offset */
 2:
 	.previous
 	.section .altinstructions,"a"
-- 
1.6.5.2


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-10-14 10:49 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-11 12:28 [PATCH RFC 1/2] [x86] Modify comments and clean up code ling.ma
2012-10-14 10:49 ` Borislav Petkov
  -- strict thread matches above, loose matches on Subject: below --
2011-06-28 22:36 [PATCH RFC 1/2 ] " ling.ma
2011-07-01 12:40 ` Ingo Molnar
2011-07-01 20:15 ` Andi Kleen
2011-07-01 20:21   ` Ingo Molnar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).