linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86-64: modify memcpy()/memset() alternatives mechanism
@ 2009-12-18 16:16 Jan Beulich
  2009-12-30 12:22 ` [tip:x86/asm] x86-64: Modify " tip-bot for Jan Beulich
  0 siblings, 1 reply; 2+ messages in thread
From: Jan Beulich @ 2009-12-18 16:16 UTC (permalink / raw)
  To: mingo, tglx, hpa; +Cc: Nick Piggin, linux-kernel

In order to avoid unnecessary chains of branches, rather than
implementing memcpy()/memset()'s access to their alternative
implementations via a jump, patch the (larger) original function
directly.

The memcpy() part of this is slightly subtle: While alternative
instruction patching does itself use memcpy(), with the replacement
block being less than 64-bytes in size the main loop of the original
function doesn't get used for copying memcpy_c() over memcpy(), and
hence we can safely write over its beginning.

Also note that the CFI annotations are fine for both variants of each
of the functions.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Nick Piggin <npiggin@suse.de>

---
 arch/x86/lib/memcpy_64.S |   23 ++++++++---------------
 arch/x86/lib/memset_64.S |   18 ++++++------------
 2 files changed, 14 insertions(+), 27 deletions(-)

--- linux-2.6.33-rc1/arch/x86/lib/memcpy_64.S	2009-06-10 05:05:27.000000000 +0200
+++ 2.6.33-rc1-x86_64-memcpy-memset-alternative/arch/x86/lib/memcpy_64.S	2009-11-16 16:10:22.000000000 +0100
@@ -20,12 +20,11 @@
 /*
  * memcpy_c() - fast string ops (REP MOVSQ) based variant.
  *
- * Calls to this get patched into the kernel image via the
+ * This gets patched over the unrolled variant (below) via the
  * alternative instructions framework:
  */
-	ALIGN
-memcpy_c:
-	CFI_STARTPROC
+	.section .altinstr_replacement, "ax", @progbits
+.Lmemcpy_c:
 	movq %rdi, %rax
 
 	movl %edx, %ecx
@@ -35,8 +34,8 @@ memcpy_c:
 	movl %edx, %ecx
 	rep movsb
 	ret
-	CFI_ENDPROC
-ENDPROC(memcpy_c)
+.Lmemcpy_e:
+	.previous
 
 ENTRY(__memcpy)
 ENTRY(memcpy)
@@ -128,16 +127,10 @@ ENDPROC(__memcpy)
 	 * It is also a lot simpler. Use this when possible:
 	 */
 
-	.section .altinstr_replacement, "ax"
-1:	.byte 0xeb				/* jmp <disp8> */
-	.byte (memcpy_c - memcpy) - (2f - 1b)	/* offset */
-2:
-	.previous
-
 	.section .altinstructions, "a"
 	.align 8
 	.quad memcpy
-	.quad 1b
+	.quad .Lmemcpy_c
 	.byte X86_FEATURE_REP_GOOD
 
 	/*
@@ -145,6 +138,6 @@ ENDPROC(__memcpy)
 	 * so it is silly to overwrite itself with nops - reboot is the
 	 * only outcome...
 	 */
-	.byte 2b - 1b
-	.byte 2b - 1b
+	.byte .Lmemcpy_e - .Lmemcpy_c
+	.byte .Lmemcpy_e - .Lmemcpy_c
 	.previous
--- linux-2.6.33-rc1/arch/x86/lib/memset_64.S	2008-01-24 23:58:37.000000000 +0100
+++ 2.6.33-rc1-x86_64-memcpy-memset-alternative/arch/x86/lib/memset_64.S	2009-11-16 16:03:03.000000000 +0100
@@ -12,9 +12,8 @@
  * 
  * rax   original destination
  */	
-	ALIGN
-memset_c:
-	CFI_STARTPROC
+	.section .altinstr_replacement, "ax", @progbits
+.Lmemset_c:
 	movq %rdi,%r9
 	movl %edx,%r8d
 	andl $7,%r8d
@@ -29,8 +28,8 @@ memset_c:
 	rep stosb
 	movq %r9,%rax
 	ret
-	CFI_ENDPROC
-ENDPROC(memset_c)
+.Lmemset_e:
+	.previous
 
 ENTRY(memset)
 ENTRY(__memset)
@@ -118,16 +117,11 @@ ENDPROC(__memset)
 
 #include <asm/cpufeature.h>
 
-	.section .altinstr_replacement,"ax"
-1:	.byte 0xeb				/* jmp <disp8> */
-	.byte (memset_c - memset) - (2f - 1b)	/* offset */
-2:
-	.previous
 	.section .altinstructions,"a"
 	.align 8
 	.quad memset
-	.quad 1b
+	.quad .Lmemset_c
 	.byte X86_FEATURE_REP_GOOD
 	.byte .Lfinal - memset
-	.byte 2b - 1b
+	.byte .Lmemset_e - .Lmemset_c
 	.previous




^ permalink raw reply	[flat|nested] 2+ messages in thread

* [tip:x86/asm] x86-64: Modify memcpy()/memset() alternatives mechanism
  2009-12-18 16:16 [PATCH] x86-64: modify memcpy()/memset() alternatives mechanism Jan Beulich
@ 2009-12-30 12:22 ` tip-bot for Jan Beulich
  0 siblings, 0 replies; 2+ messages in thread
From: tip-bot for Jan Beulich @ 2009-12-30 12:22 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, torvalds, npiggin, jbeulich, akpm,
	JBeulich, tglx, mingo

Commit-ID:  7269e8812a59f74fb1ce134465d0bcf5683b93a1
Gitweb:     http://git.kernel.org/tip/7269e8812a59f74fb1ce134465d0bcf5683b93a1
Author:     Jan Beulich <JBeulich@novell.com>
AuthorDate: Fri, 18 Dec 2009 16:16:03 +0000
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 30 Dec 2009 11:57:32 +0100

x86-64: Modify memcpy()/memset() alternatives mechanism

In order to avoid unnecessary chains of branches, rather than
implementing memcpy()/memset()'s access to their alternative
implementations via a jump, patch the (larger) original function
directly.

The memcpy() part of this is slightly subtle: while alternative
instruction patching does itself use memcpy(), with the
replacement block being less than 64-bytes in size the main loop
of the original function doesn't get used for copying memcpy_c()
over memcpy(), and hence we can safely write over its beginning.

Also note that the CFI annotations are fine for both variants of
each of the functions.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <4B2BB8D30200007800026AF2@vpn.id2.novell.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/lib/memcpy_64.S |   23 ++++++++---------------
 arch/x86/lib/memset_64.S |   18 ++++++------------
 2 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index ad5441e..f82e884 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -20,12 +20,11 @@
 /*
  * memcpy_c() - fast string ops (REP MOVSQ) based variant.
  *
- * Calls to this get patched into the kernel image via the
+ * This gets patched over the unrolled variant (below) via the
  * alternative instructions framework:
  */
-	ALIGN
-memcpy_c:
-	CFI_STARTPROC
+	.section .altinstr_replacement, "ax", @progbits
+.Lmemcpy_c:
 	movq %rdi, %rax
 
 	movl %edx, %ecx
@@ -35,8 +34,8 @@ memcpy_c:
 	movl %edx, %ecx
 	rep movsb
 	ret
-	CFI_ENDPROC
-ENDPROC(memcpy_c)
+.Lmemcpy_e:
+	.previous
 
 ENTRY(__memcpy)
 ENTRY(memcpy)
@@ -128,16 +127,10 @@ ENDPROC(__memcpy)
 	 * It is also a lot simpler. Use this when possible:
 	 */
 
-	.section .altinstr_replacement, "ax"
-1:	.byte 0xeb				/* jmp <disp8> */
-	.byte (memcpy_c - memcpy) - (2f - 1b)	/* offset */
-2:
-	.previous
-
 	.section .altinstructions, "a"
 	.align 8
 	.quad memcpy
-	.quad 1b
+	.quad .Lmemcpy_c
 	.byte X86_FEATURE_REP_GOOD
 
 	/*
@@ -145,6 +138,6 @@ ENDPROC(__memcpy)
 	 * so it is silly to overwrite itself with nops - reboot is the
 	 * only outcome...
 	 */
-	.byte 2b - 1b
-	.byte 2b - 1b
+	.byte .Lmemcpy_e - .Lmemcpy_c
+	.byte .Lmemcpy_e - .Lmemcpy_c
 	.previous
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 2c59481..e88d3b8 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -12,9 +12,8 @@
  * 
  * rax   original destination
  */	
-	ALIGN
-memset_c:
-	CFI_STARTPROC
+	.section .altinstr_replacement, "ax", @progbits
+.Lmemset_c:
 	movq %rdi,%r9
 	movl %edx,%r8d
 	andl $7,%r8d
@@ -29,8 +28,8 @@ memset_c:
 	rep stosb
 	movq %r9,%rax
 	ret
-	CFI_ENDPROC
-ENDPROC(memset_c)
+.Lmemset_e:
+	.previous
 
 ENTRY(memset)
 ENTRY(__memset)
@@ -118,16 +117,11 @@ ENDPROC(__memset)
 
 #include <asm/cpufeature.h>
 
-	.section .altinstr_replacement,"ax"
-1:	.byte 0xeb				/* jmp <disp8> */
-	.byte (memset_c - memset) - (2f - 1b)	/* offset */
-2:
-	.previous
 	.section .altinstructions,"a"
 	.align 8
 	.quad memset
-	.quad 1b
+	.quad .Lmemset_c
 	.byte X86_FEATURE_REP_GOOD
 	.byte .Lfinal - memset
-	.byte 2b - 1b
+	.byte .Lmemset_e - .Lmemset_c
 	.previous

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2009-12-30 12:23 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-12-18 16:16 [PATCH] x86-64: modify memcpy()/memset() alternatives mechanism Jan Beulich
2009-12-30 12:22 ` [tip:x86/asm] x86-64: Modify " tip-bot for Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).