linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
To: linux-mm@kvack.org
Cc: linux-mips@linux-mips.org, linux-sh@vger.kernel.org,
	Jan Beulich <jbeulich@novell.com>,
	"H. Peter Anvin" <hpa@zytor.com>,
	sparclinux@vger.kernel.org,
	Andrea Arcangeli <aarcange@redhat.com>,
	Andi Kleen <ak@linux.intel.com>,
	Robert Richter <robert.richter@amd.com>,
	x86@kernel.org, Hugh Dickins <hughd@google.com>,
	Ingo Molnar <mingo@redhat.com>, Mel Gorman <mgorman@suse.de>,
	Alex Shi <alex.shu@intel.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Tim Chen <tim.c.chen@linux.intel.com>,
	linux-kernel@vger.kernel.org,
	Andy Lutomirski <luto@amacapital.net>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linuxppc-dev@lists.ozlabs.org,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Subject: [PATCH v3 7/7] x86: switch the 64bit uncached page clear to SSE/AVX v2
Date: Thu, 16 Aug 2012 18:15:54 +0300	[thread overview]
Message-ID: <1345130154-9602-8-git-send-email-kirill.shutemov@linux.intel.com> (raw)
In-Reply-To: <1345130154-9602-1-git-send-email-kirill.shutemov@linux.intel.com>

From: Andi Kleen <ak@linux.intel.com>

With multiple threads vector stores are more efficient, so use them.
This will cause the page clear to run non preemptable and add some
overhead. However on 32bit it was already non preempable (due to
kmap_atomic) and there is an preemption opportunity every 4K unit.

On a NPB (Nasa Parallel Benchmark) 128GB run on a Westmere this improves
the performance regression of enabling transparent huge pages
by ~2% (2.81% to 0.81%), near the runtime variability now.
On a system with AVX support more is expected.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
[kirill.shutemov@linux.intel.com: Properly save/restore arguments]
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/lib/clear_page_64.S |   79 ++++++++++++++++++++++++++++++++++--------
 1 files changed, 64 insertions(+), 15 deletions(-)

diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index 9d2f3c2..b302cff 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -73,30 +73,79 @@ ENDPROC(clear_page)
 			     .Lclear_page_end-clear_page,3b-2b
 	.previous
 
+#define SSE_UNROLL 128
+
 /*
  * Zero a page avoiding the caches
  * rdi	page
  */
 ENTRY(clear_page_nocache)
 	CFI_STARTPROC
-	xorl   %eax,%eax
-	movl   $4096/64,%ecx
+	pushq_cfi %rdi
+	call   kernel_fpu_begin
+	popq_cfi  %rdi
+	sub    $16,%rsp
+	CFI_ADJUST_CFA_OFFSET 16
+	movdqu %xmm0,(%rsp)
+	xorpd  %xmm0,%xmm0
+	movl   $4096/SSE_UNROLL,%ecx
 	.p2align 4
 .Lloop_nocache:
 	decl	%ecx
-#define PUT(x) movnti %rax,x*8(%rdi)
-	movnti %rax,(%rdi)
-	PUT(1)
-	PUT(2)
-	PUT(3)
-	PUT(4)
-	PUT(5)
-	PUT(6)
-	PUT(7)
-#undef PUT
-	leaq	64(%rdi),%rdi
+	.set x,0
+	.rept SSE_UNROLL/16
+	movntdq %xmm0,x(%rdi)
+	.set x,x+16
+	.endr
+	leaq	SSE_UNROLL(%rdi),%rdi
 	jnz	.Lloop_nocache
-	nop
-	ret
+	movdqu (%rsp),%xmm0
+	addq   $16,%rsp
+	CFI_ADJUST_CFA_OFFSET -16
+	jmp   kernel_fpu_end
 	CFI_ENDPROC
 ENDPROC(clear_page_nocache)
+
+#ifdef CONFIG_AS_AVX
+
+	.section .altinstr_replacement,"ax"
+1:	.byte 0xeb					/* jmp <disp8> */
+	.byte (clear_page_nocache_avx - clear_page_nocache) - (2f - 1b)
+	/* offset */
+2:
+	.previous
+	.section .altinstructions,"a"
+	altinstruction_entry clear_page_nocache,1b,X86_FEATURE_AVX,\
+	                     16, 2b-1b
+	.previous
+
+#define AVX_UNROLL 256 /* TUNE ME */
+
+ENTRY(clear_page_nocache_avx)
+	CFI_STARTPROC
+	pushq_cfi %rdi
+	call   kernel_fpu_begin
+	popq_cfi  %rdi
+	sub    $32,%rsp
+	CFI_ADJUST_CFA_OFFSET 32
+	vmovdqu %ymm0,(%rsp)
+	vxorpd  %ymm0,%ymm0,%ymm0
+	movl   $4096/AVX_UNROLL,%ecx
+	.p2align 4
+.Lloop_avx:
+	decl	%ecx
+	.set x,0
+	.rept AVX_UNROLL/32
+	vmovntdq %ymm0,x(%rdi)
+	.set x,x+32
+	.endr
+	leaq	AVX_UNROLL(%rdi),%rdi
+	jnz	.Lloop_avx
+	vmovdqu (%rsp),%ymm0
+	addq   $32,%rsp
+	CFI_ADJUST_CFA_OFFSET -32
+	jmp   kernel_fpu_end
+	CFI_ENDPROC
+ENDPROC(clear_page_nocache_avx)
+
+#endif
-- 
1.7.7.6

      parent reply	other threads:[~2012-08-16 15:16 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-08-16 15:15 [PATCH v3 0/7] Avoid cache trashing on clearing huge/gigantic page Kirill A. Shutemov
2012-08-16 15:15 ` [PATCH v3 1/7] THP: Use real address for NUMA policy Kirill A. Shutemov
2012-08-16 15:15 ` [PATCH v3 2/7] THP: Pass fault address to __do_huge_pmd_anonymous_page() Kirill A. Shutemov
2012-08-16 15:15 ` [PATCH v3 3/7] hugetlb: pass fault address to hugetlb_no_page() Kirill A. Shutemov
2012-08-16 15:15 ` [PATCH v3 4/7] mm: pass fault address to clear_huge_page() Kirill A. Shutemov
2012-08-16 15:15 ` [PATCH v3 5/7] x86: Add clear_page_nocache Kirill A. Shutemov
2012-08-16 15:15 ` [PATCH v3 6/7] mm: make clear_huge_page cache clear only around the fault address Kirill A. Shutemov
2012-08-16 16:16   ` Andrea Arcangeli
2012-08-16 16:43     ` Kirill A. Shutemov
2012-08-16 18:29       ` Andrea Arcangeli
2012-08-16 18:37         ` Kirill A. Shutemov
2012-08-16 19:42           ` Andrea Arcangeli
2012-08-16 15:15 ` Kirill A. Shutemov [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1345130154-9602-8-git-send-email-kirill.shutemov@linux.intel.com \
    --to=kirill.shutemov@linux.intel.com \
    --cc=aarcange@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex.shu@intel.com \
    --cc=hannes@cmpxchg.org \
    --cc=hpa@zytor.com \
    --cc=hughd@google.com \
    --cc=jbeulich@novell.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mips@linux-mips.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-sh@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=luto@amacapital.net \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=robert.richter@amd.com \
    --cc=sparclinux@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=tim.c.chen@linux.intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).