All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ben Hutchings <ben@decadent.org.uk>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>,
	Denys Vlasenko <dvlasenk@redhat.com>,
	linux-nvdimm@lists.01.org, Peter Zijlstra <peterz@infradead.org>,
	Brian Gerst <brgerst@gmail.com>, "H. Peter Anvin" <hpa@zytor.com>,
	micah.parrish@hpe.com, Andy Lutomirski <luto@amacapital.net>,
	brian.boylston@hpe.com, Ingo Molnar <mingo@kernel.org>,
	Borislav Petkov <bp@alien8.de>, Toshi Kani <toshi.kani@hp.com>,
	akpm@linux-foundation.org, Borislav Petkov <bp@suse.de>,
	Linus Torvalds <torvalds@linux-foundation.org>
Subject: [PATCH 3.2 23/62] x86/uaccess/64: Make the __copy_user_nocache() assembly code more readable
Date: Tue, 29 Mar 2016 20:18:22 +0100	[thread overview]
Message-ID: <lsq.1459279102.891832774@decadent.org.uk> (raw)
In-Reply-To: <lsq.1459279101.951687763@decadent.org.uk>

3.2.79-rc1 review patch.  If anyone has any objections, please let me know.

------------------

From: Toshi Kani <toshi.kani@hpe.com>

commit ee9737c924706aaa72c2ead93e3ad5644681dc1c upstream.

Add comments to __copy_user_nocache() to clarify its procedures
and alignment requirements.

Also change numeric branch target labels to named local labels.

No code changed:

 arch/x86/lib/copy_user_64.o:

    text    data     bss     dec     hex filename
    1239       0       0    1239     4d7 copy_user_64.o.before
    1239       0       0    1239     4d7 copy_user_64.o.after

 md5:
    58bed94c2db98c1ca9a2d46d0680aaae  copy_user_64.o.before.asm
    58bed94c2db98c1ca9a2d46d0680aaae  copy_user_64.o.after.asm

Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: brian.boylston@hpe.com
Cc: dan.j.williams@intel.com
Cc: linux-nvdimm@lists.01.org
Cc: micah.parrish@hpe.com
Cc: ross.zwisler@linux.intel.com
Cc: vishal.l.verma@intel.com
Link: http://lkml.kernel.org/r/1455225857-12039-2-git-send-email-toshi.kani@hpe.com
[ Small readability edits and added object file comparison. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
[bwh: Backported to 3.2: aadjust filename, context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
 arch/x86/lib/copy_user_nocache_64.S | 114 ++++++++++++++++++++++++++++----------------
 1 file changed, 73 insertions(+), 41 deletions(-)

--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -44,17 +44,30 @@
 
 /*
  * copy_user_nocache - Uncached memory copy with exception handling
- * This will force destination/source out of cache for more performance.
+ * This will force destination out of cache for more performance.
+ *
+ * Note: Cached memory copy is used when destination or size is not
+ * naturally aligned. That is:
+ *  - Require 8-byte alignment when size is 8 bytes or larger.
  */
 ENTRY(__copy_user_nocache)
 	CFI_STARTPROC
+
+	/* If size is less than 8 bytes, go to byte copy */
 	cmpl $8,%edx
-	jb 20f		/* less then 8 bytes, go to byte copy loop */
+	jb .L_1b_cache_copy_entry
+
+	/* If destination is not 8-byte aligned, "cache" copy to align it */
 	ALIGN_DESTINATION
+
+	/* Set 4x8-byte copy count and remainder */
 	movl %edx,%ecx
 	andl $63,%edx
 	shrl $6,%ecx
-	jz 17f
+	jz .L_8b_nocache_copy_entry	/* jump if count is 0 */
+
+	/* Perform 4x8-byte nocache loop-copy */
+.L_4x8b_nocache_copy_loop:
 1:	movq (%rsi),%r8
 2:	movq 1*8(%rsi),%r9
 3:	movq 2*8(%rsi),%r10
@@ -74,60 +87,79 @@ ENTRY(__copy_user_nocache)
 	leaq 64(%rsi),%rsi
 	leaq 64(%rdi),%rdi
 	decl %ecx
-	jnz 1b
-17:	movl %edx,%ecx
+	jnz .L_4x8b_nocache_copy_loop
+
+	/* Set 8-byte copy count and remainder */
+.L_8b_nocache_copy_entry:
+	movl %edx,%ecx
 	andl $7,%edx
 	shrl $3,%ecx
-	jz 20f
-18:	movq (%rsi),%r8
-19:	movnti %r8,(%rdi)
+	jz .L_1b_cache_copy_entry	/* jump if count is 0 */
+
+	/* Perform 8-byte nocache loop-copy */
+.L_8b_nocache_copy_loop:
+20:	movq (%rsi),%r8
+21:	movnti %r8,(%rdi)
 	leaq 8(%rsi),%rsi
 	leaq 8(%rdi),%rdi
 	decl %ecx
-	jnz 18b
-20:	andl %edx,%edx
-	jz 23f
+	jnz .L_8b_nocache_copy_loop
+
+	/* If no byte left, we're done */
+.L_1b_cache_copy_entry:
+	andl %edx,%edx
+	jz .L_finish_copy
+
+	/* Perform byte "cache" loop-copy for the remainder */
 	movl %edx,%ecx
-21:	movb (%rsi),%al
-22:	movb %al,(%rdi)
+.L_1b_cache_copy_loop:
+40:	movb (%rsi),%al
+41:	movb %al,(%rdi)
 	incq %rsi
 	incq %rdi
 	decl %ecx
-	jnz 21b
-23:	xorl %eax,%eax
+	jnz .L_1b_cache_copy_loop
+
+	/* Finished copying; fence the prior stores */
+.L_finish_copy:
+	xorl %eax,%eax
 	sfence
 	ret
 
 	.section .fixup,"ax"
-30:	shll $6,%ecx
+.L_fixup_4x8b_copy:
+	shll $6,%ecx
 	addl %ecx,%edx
-	jmp 60f
-40:	lea (%rdx,%rcx,8),%rdx
-	jmp 60f
-50:	movl %ecx,%edx
-60:	sfence
+	jmp .L_fixup_handle_tail
+.L_fixup_8b_copy:
+	lea (%rdx,%rcx,8),%rdx
+	jmp .L_fixup_handle_tail
+.L_fixup_1b_copy:
+	movl %ecx,%edx
+.L_fixup_handle_tail:
+	sfence
 	jmp copy_user_handle_tail
 	.previous
 
-	_ASM_EXTABLE(1b,30b)
-	_ASM_EXTABLE(2b,30b)
-	_ASM_EXTABLE(3b,30b)
-	_ASM_EXTABLE(4b,30b)
-	_ASM_EXTABLE(5b,30b)
-	_ASM_EXTABLE(6b,30b)
-	_ASM_EXTABLE(7b,30b)
-	_ASM_EXTABLE(8b,30b)
-	_ASM_EXTABLE(9b,30b)
-	_ASM_EXTABLE(10b,30b)
-	_ASM_EXTABLE(11b,30b)
-	_ASM_EXTABLE(12b,30b)
-	_ASM_EXTABLE(13b,30b)
-	_ASM_EXTABLE(14b,30b)
-	_ASM_EXTABLE(15b,30b)
-	_ASM_EXTABLE(16b,30b)
-	_ASM_EXTABLE(18b,40b)
-	_ASM_EXTABLE(19b,40b)
-	_ASM_EXTABLE(21b,50b)
-	_ASM_EXTABLE(22b,50b)
+	_ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(20b,.L_fixup_8b_copy)
+	_ASM_EXTABLE(21b,.L_fixup_8b_copy)
+	_ASM_EXTABLE(40b,.L_fixup_1b_copy)
+	_ASM_EXTABLE(41b,.L_fixup_1b_copy)
 	CFI_ENDPROC
 ENDPROC(__copy_user_nocache)

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

WARNING: multiple messages have this Message-ID (diff)
From: Ben Hutchings <ben@decadent.org.uk>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: akpm@linux-foundation.org, "Borislav Petkov" <bp@suse.de>,
	"Denys Vlasenko" <dvlasenk@redhat.com>,
	brian.boylston@hpe.com, "Brian Gerst" <brgerst@gmail.com>,
	vishal.l.verma@intel.com, "Borislav Petkov" <bp@alien8.de>,
	linux-nvdimm@ml01.01.org, ross.zwisler@linux.intel.com,
	"Luis R. Rodriguez" <mcgrof@suse.com>,
	"Toshi Kani" <toshi.kani@hpe.com>,
	"H. Peter Anvin" <hpa@zytor.com>,
	"Toshi Kani" <toshi.kani@hp.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Linus Torvalds" <torvalds@linux-foundation.org>,
	dan.j.williams@intel.com, "Andy Lutomirski" <luto@amacapital.net>,
	"Ingo Molnar" <mingo@kernel.org>,
	micah.parrish@hpe.com, "Peter Zijlstra" <peterz@infradead.org>
Subject: [PATCH 3.2 23/62] x86/uaccess/64: Make the __copy_user_nocache() assembly code more readable
Date: Tue, 29 Mar 2016 20:18:22 +0100	[thread overview]
Message-ID: <lsq.1459279102.891832774@decadent.org.uk> (raw)
In-Reply-To: <lsq.1459279101.951687763@decadent.org.uk>

3.2.79-rc1 review patch.  If anyone has any objections, please let me know.

------------------

From: Toshi Kani <toshi.kani@hpe.com>

commit ee9737c924706aaa72c2ead93e3ad5644681dc1c upstream.

Add comments to __copy_user_nocache() to clarify its procedures
and alignment requirements.

Also change numeric branch target labels to named local labels.

No code changed:

 arch/x86/lib/copy_user_64.o:

    text    data     bss     dec     hex filename
    1239       0       0    1239     4d7 copy_user_64.o.before
    1239       0       0    1239     4d7 copy_user_64.o.after

 md5:
    58bed94c2db98c1ca9a2d46d0680aaae  copy_user_64.o.before.asm
    58bed94c2db98c1ca9a2d46d0680aaae  copy_user_64.o.after.asm

Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: brian.boylston@hpe.com
Cc: dan.j.williams@intel.com
Cc: linux-nvdimm@lists.01.org
Cc: micah.parrish@hpe.com
Cc: ross.zwisler@linux.intel.com
Cc: vishal.l.verma@intel.com
Link: http://lkml.kernel.org/r/1455225857-12039-2-git-send-email-toshi.kani@hpe.com
[ Small readability edits and added object file comparison. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
[bwh: Backported to 3.2: aadjust filename, context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
 arch/x86/lib/copy_user_nocache_64.S | 114 ++++++++++++++++++++++++++++----------------
 1 file changed, 73 insertions(+), 41 deletions(-)

--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -44,17 +44,30 @@
 
 /*
  * copy_user_nocache - Uncached memory copy with exception handling
- * This will force destination/source out of cache for more performance.
+ * This will force destination out of cache for more performance.
+ *
+ * Note: Cached memory copy is used when destination or size is not
+ * naturally aligned. That is:
+ *  - Require 8-byte alignment when size is 8 bytes or larger.
  */
 ENTRY(__copy_user_nocache)
 	CFI_STARTPROC
+
+	/* If size is less than 8 bytes, go to byte copy */
 	cmpl $8,%edx
-	jb 20f		/* less then 8 bytes, go to byte copy loop */
+	jb .L_1b_cache_copy_entry
+
+	/* If destination is not 8-byte aligned, "cache" copy to align it */
 	ALIGN_DESTINATION
+
+	/* Set 4x8-byte copy count and remainder */
 	movl %edx,%ecx
 	andl $63,%edx
 	shrl $6,%ecx
-	jz 17f
+	jz .L_8b_nocache_copy_entry	/* jump if count is 0 */
+
+	/* Perform 4x8-byte nocache loop-copy */
+.L_4x8b_nocache_copy_loop:
 1:	movq (%rsi),%r8
 2:	movq 1*8(%rsi),%r9
 3:	movq 2*8(%rsi),%r10
@@ -74,60 +87,79 @@ ENTRY(__copy_user_nocache)
 	leaq 64(%rsi),%rsi
 	leaq 64(%rdi),%rdi
 	decl %ecx
-	jnz 1b
-17:	movl %edx,%ecx
+	jnz .L_4x8b_nocache_copy_loop
+
+	/* Set 8-byte copy count and remainder */
+.L_8b_nocache_copy_entry:
+	movl %edx,%ecx
 	andl $7,%edx
 	shrl $3,%ecx
-	jz 20f
-18:	movq (%rsi),%r8
-19:	movnti %r8,(%rdi)
+	jz .L_1b_cache_copy_entry	/* jump if count is 0 */
+
+	/* Perform 8-byte nocache loop-copy */
+.L_8b_nocache_copy_loop:
+20:	movq (%rsi),%r8
+21:	movnti %r8,(%rdi)
 	leaq 8(%rsi),%rsi
 	leaq 8(%rdi),%rdi
 	decl %ecx
-	jnz 18b
-20:	andl %edx,%edx
-	jz 23f
+	jnz .L_8b_nocache_copy_loop
+
+	/* If no byte left, we're done */
+.L_1b_cache_copy_entry:
+	andl %edx,%edx
+	jz .L_finish_copy
+
+	/* Perform byte "cache" loop-copy for the remainder */
 	movl %edx,%ecx
-21:	movb (%rsi),%al
-22:	movb %al,(%rdi)
+.L_1b_cache_copy_loop:
+40:	movb (%rsi),%al
+41:	movb %al,(%rdi)
 	incq %rsi
 	incq %rdi
 	decl %ecx
-	jnz 21b
-23:	xorl %eax,%eax
+	jnz .L_1b_cache_copy_loop
+
+	/* Finished copying; fence the prior stores */
+.L_finish_copy:
+	xorl %eax,%eax
 	sfence
 	ret
 
 	.section .fixup,"ax"
-30:	shll $6,%ecx
+.L_fixup_4x8b_copy:
+	shll $6,%ecx
 	addl %ecx,%edx
-	jmp 60f
-40:	lea (%rdx,%rcx,8),%rdx
-	jmp 60f
-50:	movl %ecx,%edx
-60:	sfence
+	jmp .L_fixup_handle_tail
+.L_fixup_8b_copy:
+	lea (%rdx,%rcx,8),%rdx
+	jmp .L_fixup_handle_tail
+.L_fixup_1b_copy:
+	movl %ecx,%edx
+.L_fixup_handle_tail:
+	sfence
 	jmp copy_user_handle_tail
 	.previous
 
-	_ASM_EXTABLE(1b,30b)
-	_ASM_EXTABLE(2b,30b)
-	_ASM_EXTABLE(3b,30b)
-	_ASM_EXTABLE(4b,30b)
-	_ASM_EXTABLE(5b,30b)
-	_ASM_EXTABLE(6b,30b)
-	_ASM_EXTABLE(7b,30b)
-	_ASM_EXTABLE(8b,30b)
-	_ASM_EXTABLE(9b,30b)
-	_ASM_EXTABLE(10b,30b)
-	_ASM_EXTABLE(11b,30b)
-	_ASM_EXTABLE(12b,30b)
-	_ASM_EXTABLE(13b,30b)
-	_ASM_EXTABLE(14b,30b)
-	_ASM_EXTABLE(15b,30b)
-	_ASM_EXTABLE(16b,30b)
-	_ASM_EXTABLE(18b,40b)
-	_ASM_EXTABLE(19b,40b)
-	_ASM_EXTABLE(21b,50b)
-	_ASM_EXTABLE(22b,50b)
+	_ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(20b,.L_fixup_8b_copy)
+	_ASM_EXTABLE(21b,.L_fixup_8b_copy)
+	_ASM_EXTABLE(40b,.L_fixup_1b_copy)
+	_ASM_EXTABLE(41b,.L_fixup_1b_copy)
 	CFI_ENDPROC
 ENDPROC(__copy_user_nocache)

WARNING: multiple messages have this Message-ID (diff)
From: Ben Hutchings <ben@decadent.org.uk>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: akpm@linux-foundation.org, "Borislav Petkov" <bp@suse.de>,
	"Denys Vlasenko" <dvlasenk@redhat.com>,
	brian.boylston@hpe.com, "Brian Gerst" <brgerst@gmail.com>,
	vishal.l.verma@intel.com, "Borislav Petkov" <bp@alien8.de>,
	linux-nvdimm@lists.01.org, ross.zwisler@linux.intel.com,
	"Luis R. Rodriguez" <mcgrof@suse.com>,
	"Toshi Kani" <toshi.kani@hpe.com>,
	"H. Peter Anvin" <hpa@zytor.com>,
	"Toshi Kani" <toshi.kani@hp.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Linus Torvalds" <torvalds@linux-foundation.org>,
	dan.j.williams@intel.com, "Andy Lutomirski" <luto@amacapital.net>,
	"Ingo Molnar" <mingo@kernel.org>,
	micah.parrish@hpe.com, "Peter Zijlstra" <peterz@infradead.org>
Subject: [PATCH 3.2 23/62] x86/uaccess/64: Make the __copy_user_nocache() assembly code more readable
Date: Tue, 29 Mar 2016 20:18:22 +0100	[thread overview]
Message-ID: <lsq.1459279102.891832774@decadent.org.uk> (raw)
In-Reply-To: <lsq.1459279101.951687763@decadent.org.uk>

3.2.79-rc1 review patch.  If anyone has any objections, please let me know.

------------------

From: Toshi Kani <toshi.kani@hpe.com>

commit ee9737c924706aaa72c2ead93e3ad5644681dc1c upstream.

Add comments to __copy_user_nocache() to clarify its procedures
and alignment requirements.

Also change numeric branch target labels to named local labels.

No code changed:

 arch/x86/lib/copy_user_64.o:

    text    data     bss     dec     hex filename
    1239       0       0    1239     4d7 copy_user_64.o.before
    1239       0       0    1239     4d7 copy_user_64.o.after

 md5:
    58bed94c2db98c1ca9a2d46d0680aaae  copy_user_64.o.before.asm
    58bed94c2db98c1ca9a2d46d0680aaae  copy_user_64.o.after.asm

Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: brian.boylston@hpe.com
Cc: dan.j.williams@intel.com
Cc: linux-nvdimm@lists.01.org
Cc: micah.parrish@hpe.com
Cc: ross.zwisler@linux.intel.com
Cc: vishal.l.verma@intel.com
Link: http://lkml.kernel.org/r/1455225857-12039-2-git-send-email-toshi.kani@hpe.com
[ Small readability edits and added object file comparison. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
[bwh: Backported to 3.2: aadjust filename, context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
 arch/x86/lib/copy_user_nocache_64.S | 114 ++++++++++++++++++++++++++++----------------
 1 file changed, 73 insertions(+), 41 deletions(-)

--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -44,17 +44,30 @@
 
 /*
  * copy_user_nocache - Uncached memory copy with exception handling
- * This will force destination/source out of cache for more performance.
+ * This will force destination out of cache for more performance.
+ *
+ * Note: Cached memory copy is used when destination or size is not
+ * naturally aligned. That is:
+ *  - Require 8-byte alignment when size is 8 bytes or larger.
  */
 ENTRY(__copy_user_nocache)
 	CFI_STARTPROC
+
+	/* If size is less than 8 bytes, go to byte copy */
 	cmpl $8,%edx
-	jb 20f		/* less then 8 bytes, go to byte copy loop */
+	jb .L_1b_cache_copy_entry
+
+	/* If destination is not 8-byte aligned, "cache" copy to align it */
 	ALIGN_DESTINATION
+
+	/* Set 4x8-byte copy count and remainder */
 	movl %edx,%ecx
 	andl $63,%edx
 	shrl $6,%ecx
-	jz 17f
+	jz .L_8b_nocache_copy_entry	/* jump if count is 0 */
+
+	/* Perform 4x8-byte nocache loop-copy */
+.L_4x8b_nocache_copy_loop:
 1:	movq (%rsi),%r8
 2:	movq 1*8(%rsi),%r9
 3:	movq 2*8(%rsi),%r10
@@ -74,60 +87,79 @@ ENTRY(__copy_user_nocache)
 	leaq 64(%rsi),%rsi
 	leaq 64(%rdi),%rdi
 	decl %ecx
-	jnz 1b
-17:	movl %edx,%ecx
+	jnz .L_4x8b_nocache_copy_loop
+
+	/* Set 8-byte copy count and remainder */
+.L_8b_nocache_copy_entry:
+	movl %edx,%ecx
 	andl $7,%edx
 	shrl $3,%ecx
-	jz 20f
-18:	movq (%rsi),%r8
-19:	movnti %r8,(%rdi)
+	jz .L_1b_cache_copy_entry	/* jump if count is 0 */
+
+	/* Perform 8-byte nocache loop-copy */
+.L_8b_nocache_copy_loop:
+20:	movq (%rsi),%r8
+21:	movnti %r8,(%rdi)
 	leaq 8(%rsi),%rsi
 	leaq 8(%rdi),%rdi
 	decl %ecx
-	jnz 18b
-20:	andl %edx,%edx
-	jz 23f
+	jnz .L_8b_nocache_copy_loop
+
+	/* If no byte left, we're done */
+.L_1b_cache_copy_entry:
+	andl %edx,%edx
+	jz .L_finish_copy
+
+	/* Perform byte "cache" loop-copy for the remainder */
 	movl %edx,%ecx
-21:	movb (%rsi),%al
-22:	movb %al,(%rdi)
+.L_1b_cache_copy_loop:
+40:	movb (%rsi),%al
+41:	movb %al,(%rdi)
 	incq %rsi
 	incq %rdi
 	decl %ecx
-	jnz 21b
-23:	xorl %eax,%eax
+	jnz .L_1b_cache_copy_loop
+
+	/* Finished copying; fence the prior stores */
+.L_finish_copy:
+	xorl %eax,%eax
 	sfence
 	ret
 
 	.section .fixup,"ax"
-30:	shll $6,%ecx
+.L_fixup_4x8b_copy:
+	shll $6,%ecx
 	addl %ecx,%edx
-	jmp 60f
-40:	lea (%rdx,%rcx,8),%rdx
-	jmp 60f
-50:	movl %ecx,%edx
-60:	sfence
+	jmp .L_fixup_handle_tail
+.L_fixup_8b_copy:
+	lea (%rdx,%rcx,8),%rdx
+	jmp .L_fixup_handle_tail
+.L_fixup_1b_copy:
+	movl %ecx,%edx
+.L_fixup_handle_tail:
+	sfence
 	jmp copy_user_handle_tail
 	.previous
 
-	_ASM_EXTABLE(1b,30b)
-	_ASM_EXTABLE(2b,30b)
-	_ASM_EXTABLE(3b,30b)
-	_ASM_EXTABLE(4b,30b)
-	_ASM_EXTABLE(5b,30b)
-	_ASM_EXTABLE(6b,30b)
-	_ASM_EXTABLE(7b,30b)
-	_ASM_EXTABLE(8b,30b)
-	_ASM_EXTABLE(9b,30b)
-	_ASM_EXTABLE(10b,30b)
-	_ASM_EXTABLE(11b,30b)
-	_ASM_EXTABLE(12b,30b)
-	_ASM_EXTABLE(13b,30b)
-	_ASM_EXTABLE(14b,30b)
-	_ASM_EXTABLE(15b,30b)
-	_ASM_EXTABLE(16b,30b)
-	_ASM_EXTABLE(18b,40b)
-	_ASM_EXTABLE(19b,40b)
-	_ASM_EXTABLE(21b,50b)
-	_ASM_EXTABLE(22b,50b)
+	_ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
+	_ASM_EXTABLE(20b,.L_fixup_8b_copy)
+	_ASM_EXTABLE(21b,.L_fixup_8b_copy)
+	_ASM_EXTABLE(40b,.L_fixup_1b_copy)
+	_ASM_EXTABLE(41b,.L_fixup_1b_copy)
 	CFI_ENDPROC
 ENDPROC(__copy_user_nocache)


  parent reply	other threads:[~2016-03-29 20:05 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-29 19:18 [PATCH 3.2 00/62] 3.2.79-rc1 review Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 04/62] wext: fix message delay/ordering Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 37/62] Revert "jffs2: Fix lock acquisition order bug in jffs2_write_begin" Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 31/62] ext4: fix bh->b_state corruption Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 08/62] drm/i915: fix error path in intel_setup_gmbus() Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 16/62] ALSA: seq: Fix leak of pool buffer at concurrent writes Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 29/62] USB: option: add "4G LTE usb-modem U901" Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 38/62] jffs2: Fix page lock / f->sem deadlock Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 07/62] nfs: fix nfs_size_to_loff_t Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 58/62] Revert "drm/radeon: call hpd_irq_event on resume" Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 21/62] af_unix: Guard against other == sk in unix_dgram_sendmsg Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 18/62] tracing: Fix freak link error caused by branch tracer Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 44/62] hpfs: don't truncate the file when delete fails Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 13/62] xen/pciback: Check PF instead of VF for PCI_COMMAND_MEMORY Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 06/62] mac80211: fix use of uninitialised values in RX aggregation Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 41/62] ipr: Fix out-of-bounds null overwrite Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 15/62] xen/pcifront: Fix mysterious crashes when NUMA locality information was extracted Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 32/62] kernel/resource.c: fix muxed resource handling in __request_region() Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 45/62] ALSA: timer: Fix broken compat timer user status ioctl Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 53/62] PM / sleep / x86: Fix crash on graph trace through x86 suspend Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 42/62] ipr: Fix regression when loading firmware Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 50/62] ASoC: wm8994: Fix enum ctl accesses in a wrong type Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 26/62] net/mlx4_en: Count HW buffer overrun only once Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 57/62] ubi: Fix out of bounds write in volume update code Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 30/62] Adding Intel Lewisburg device IDs for SATA Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 52/62] ALSA: seq: oss: Don't drain at closing a client Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 28/62] USB: cp210x: add IDs for GE B650V3 and B850V3 boards Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 46/62] ALSA: hdspm: Fix wrong boolean ctl value accesses Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 47/62] ALSA: hdsp: " Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 34/62] mac80211: minstrel_ht: set default tx aggregation timeout to 0 Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 48/62] USB: cp210x: Add ID for Parrot NMEA GPS Flight Recorder Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 43/62] mm: thp: fix SMP race condition between THP page fault and MADV_DONTNEED Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 10/62] s390/dasd: prevent incorrect length error under z/VM after PAV changes Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 54/62] IB/core: Use GRH when the path hop-limit > 0 Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 24/62] x86/uaccess/64: Handle the caching of 4-byte nocache copies properly in __copy_user_nocache() Ben Hutchings
2016-03-29 19:18   ` Ben Hutchings
2016-03-29 19:18   ` Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 56/62] MIPS: traps: Fix SIGFPE information leak from `do_ov' and `do_trap_or_bp' Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 59/62] s390/mm: four page table levels vs. fork Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 22/62] x86, extable: Remove open-coded exception table entries in arch/x86/lib/copy_user_nocache_64.S Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 39/62] Fix directory hardlinks from deleted directories Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 36/62] KVM: async_pf: do not warn on page allocation failures Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 03/62] iommu/vt-d: Fix 64-bit accesses to 32-bit DMAR_GSTS_REG Ben Hutchings
2016-03-29 19:18 ` Ben Hutchings [this message]
2016-03-29 19:18   ` [PATCH 3.2 23/62] x86/uaccess/64: Make the __copy_user_nocache() assembly code more readable Ben Hutchings
2016-03-29 19:18   ` Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 11/62] s390/dasd: fix refcount for PAV reassignment Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 40/62] libata: Align ata_device's id on a cacheline Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 62/62] HID: usbhid: fix recursive deadlock Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 25/62] usb: dwc3: Fix assignment of EP transfer resources Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 61/62] include/linux/poison.h: fix LIST_POISON{1,2} offset Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 01/62] Revert "crypto: algif_skcipher - Do not dereference ctx without socket lock" Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 60/62] Input: aiptek - fix crash on detecting device without endpoints Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 05/62] cfg80211/wext: fix message ordering Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 19/62] ALSA: seq: Fix double port list deletion Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 17/62] tracepoints: Do not trace when cpu is offline Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 51/62] USB: serial: option: add support for Quectel UC20 Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 55/62] mld, igmp: Fix reserved tailroom calculation Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 33/62] can: ems_usb: Fix possible tx overflow Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 27/62] USB: option: add support for SIM7100E Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 12/62] libata: fix HDIO_GET_32BIT ioctl Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 09/62] cifs: fix erroneous return value Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 02/62] crypto: {blk,giv}cipher: Set has_setkey Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 20/62] af_unix: Don't set err in unix_stream_read_generic unless there was an error Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 14/62] xen/pciback: Save the number of MSI-X entries to be copied later Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 35/62] sunrpc/cache: fix off-by-one in qword_get() Ben Hutchings
2016-03-29 19:18 ` [PATCH 3.2 49/62] ASoC: wm8958: Fix enum ctl accesses in a wrong type Ben Hutchings
2016-03-29 20:19 ` [PATCH 3.2 00/62] 3.2.79-rc1 review Ben Hutchings
2016-03-29 20:26 ` Guenter Roeck
2016-03-29 21:03   ` Ben Hutchings

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=lsq.1459279102.891832774@decadent.org.uk \
    --to=ben@decadent.org.uk \
    --cc=akpm@linux-foundation.org \
    --cc=bp@alien8.de \
    --cc=bp@suse.de \
    --cc=brgerst@gmail.com \
    --cc=brian.boylston@hpe.com \
    --cc=dvlasenk@redhat.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=luto@amacapital.net \
    --cc=micah.parrish@hpe.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=toshi.kani@hp.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.