linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Nicholas Piggin <npiggin@gmail.com>
To: linuxppc-dev@lists.ozlabs.org
Cc: Nicholas Piggin <npiggin@gmail.com>
Subject: [PATCH v3 6/7] powerpc/64s/radix: optimise pte_update
Date: Fri, 25 May 2018 03:58:52 +1000	[thread overview]
Message-ID: <20180524175853.19695-7-npiggin@gmail.com> (raw)
In-Reply-To: <20180524175853.19695-1-npiggin@gmail.com>

Implementing pte_update with pte_xchg (which uses cmpxchg) is
inefficient. A single larx/stcx. works fine, no need for the less
efficient cmpxchg sequence.

Then remove the memory barriers from the operation. There is a
requirement for TLB flushing to load mm_cpumask after the store
that reduces pte permissions, which is moved into the TLB flush
code.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/book3s/64/radix.h | 25 +++++++++++-----------
 arch/powerpc/mm/mmu_context.c              |  6 ++++--
 arch/powerpc/mm/tlb-radix.c                | 11 +++++++++-
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 45bf1e1b1d33..cc9437a542cc 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -127,20 +127,21 @@ extern void radix__mark_initmem_nx(void);
 static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
 					       unsigned long set)
 {
-	pte_t pte;
-	unsigned long old_pte, new_pte;
-
-	do {
-		pte = READ_ONCE(*ptep);
-		old_pte = pte_val(pte);
-		new_pte = (old_pte | set) & ~clr;
-
-	} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
-
-	return old_pte;
+	__be64 old_be, tmp_be;
+
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%3		# pte_update\n"
+	"	andc	%1,%0,%5	\n"
+	"	or	%1,%1,%4	\n"
+	"	stdcx.	%1,0,%3		\n"
+	"	bne-	1b"
+	: "=&r" (old_be), "=&r" (tmp_be), "=m" (*ptep)
+	: "r" (ptep), "r" (cpu_to_be64(set)), "r" (cpu_to_be64(clr))
+	: "cc" );
+
+	return be64_to_cpu(old_be);
 }
 
-
 static inline unsigned long radix__pte_update(struct mm_struct *mm,
 					unsigned long addr,
 					pte_t *ptep, unsigned long clr,
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 0ab297c4cfad..f84e14f23e50 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -57,8 +57,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		 * in switch_slb(), and/or the store of paca->mm_ctx_id in
 		 * copy_mm_to_paca().
 		 *
-		 * On the read side the barrier is in pte_xchg(), which orders
-		 * the store to the PTE vs the load of mm_cpumask.
+		 * On the other side, the barrier is in mm/tlb-radix.c for
+		 * radix which orders earlier stores to clear the PTEs vs
+		 * the load of mm_cpumask. And pte_xchg which does the same
+		 * thing for hash.
 		 *
 		 * This full barrier is needed by membarrier when switching
 		 * between processes after store to rq->curr, before user-space
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 5ac3206c51cc..cdc50398fd60 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -524,6 +524,11 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
 		return;
 
 	preempt_disable();
+	/*
+	 * Order loads of mm_cpumask vs previous stores to clear ptes before
+	 * the invalidate. See barrier in switch_mm_irqs_off
+	 */
+	smp_mb();
 	if (!mm_is_thread_local(mm)) {
 		if (mm_needs_flush_escalation(mm))
 			_tlbie_pid(pid, RIC_FLUSH_ALL);
@@ -544,6 +549,7 @@ void radix__flush_all_mm(struct mm_struct *mm)
 		return;
 
 	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
 	if (!mm_is_thread_local(mm))
 		_tlbie_pid(pid, RIC_FLUSH_ALL);
 	else
@@ -568,6 +574,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 		return;
 
 	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
 	if (!mm_is_thread_local(mm))
 		_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
 	else
@@ -630,6 +637,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		return;
 
 	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
 	if (mm_is_thread_local(mm)) {
 		local = true;
 		full = (end == TLB_FLUSH_ALL ||
@@ -791,6 +799,7 @@ static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
 		return;
 
 	preempt_disable();
+	smp_mb(); /* see radix__flush_tlb_mm */
 	if (mm_is_thread_local(mm)) {
 		local = true;
 		full = (end == TLB_FLUSH_ALL ||
@@ -849,7 +858,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
 
 	/* Otherwise first do the PWC, then iterate the pages. */
 	preempt_disable();
-
+	smp_mb(); /* see radix__flush_tlb_mm */
 	if (mm_is_thread_local(mm)) {
 		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
 	} else {
-- 
2.17.0

  parent reply	other threads:[~2018-05-24 17:59 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-05-24 17:58 [PATCH v3 0/7] Various TLB and PTE improvements Nicholas Piggin
2018-05-24 17:58 ` [PATCH v3 1/7] powerpc/64s/radix: do not flush TLB when relaxing access Nicholas Piggin
2018-05-24 17:58 ` [PATCH v3 2/7] powerpc/64s/radix: do not flush TLB on spurious fault Nicholas Piggin
2018-05-24 17:58 ` [PATCH v3 3/7] powerpc/64s/radix: make ptep_get_and_clear_full non-atomic for the full case Nicholas Piggin
2018-05-24 17:58 ` [PATCH v3 4/7] powerpc/64s/radix: prefetch user address in update_mmu_cache Nicholas Piggin
2018-05-24 17:58 ` [PATCH v3 5/7] powerpc/64s/radix: avoid ptesync after set_pte and ptep_set_access_flags Nicholas Piggin
2018-05-24 17:58 ` Nicholas Piggin [this message]
2018-05-24 17:58 ` [PATCH v3 7/7] powerpc/64s/radix: flush remote CPUs out of single-threaded mm_cpumask Nicholas Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180524175853.19695-7-npiggin@gmail.com \
    --to=npiggin@gmail.com \
    --cc=linuxppc-dev@lists.ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).