linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH 0/5] powerpc/64s/radix: Use non-atomic ops for PTE
@ 2020-12-19 23:48 Nicholas Piggin
  2020-12-19 23:48 ` [RFC PATCH 1/5] powerpc/64s: update_mmu_cache inline the radix test Nicholas Piggin
                   ` (4 more replies)
  0 siblings, 5 replies; 10+ messages in thread
From: Nicholas Piggin @ 2020-12-19 23:48 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

This is not entirely settled because of future compatibility concern
but in terms of Linux code and existing processors I think the approach
works.

Thanks,
Nick

Nicholas Piggin (5):
  powerpc/64s: update_mmu_cache inline the radix test
  powerpc/64s: implement mm_nmmu_has_tlbs
  powerpc/64s: add CONFIG_PPC_NMMU for nest MMU support
  powerpc/64s/radix: implement complete radix__ptep_get_and_clear_full
  powerpc/64s/radix: Use non-atomic PTE updates if the MMU does not
    modify the PTE

 arch/powerpc/Kconfig                          |  1 +
 arch/powerpc/include/asm/book3s/64/mmu.h      |  2 +
 arch/powerpc/include/asm/book3s/64/pgtable.h  | 10 +--
 arch/powerpc/include/asm/book3s/64/radix.h    | 50 +++++------
 arch/powerpc/include/asm/book3s/64/tlbflush.h | 11 ++-
 arch/powerpc/include/asm/book3s/pgtable.h     | 11 ++-
 arch/powerpc/include/asm/mmu_context.h        |  5 +-
 arch/powerpc/mm/book3s32/mmu.c                |  4 +-
 arch/powerpc/mm/book3s64/hash_utils.c         |  7 +-
 arch/powerpc/mm/book3s64/radix_hugetlbpage.c  |  3 +-
 arch/powerpc/mm/book3s64/radix_pgtable.c      | 82 +++++++++++++++----
 arch/powerpc/mm/book3s64/radix_tlb.c          | 26 +++---
 arch/powerpc/platforms/Kconfig                |  3 +
 arch/powerpc/platforms/powernv/Kconfig        |  1 +
 14 files changed, 134 insertions(+), 82 deletions(-)

-- 
2.23.0


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RFC PATCH 1/5] powerpc/64s: update_mmu_cache inline the radix test
  2020-12-19 23:48 [RFC PATCH 0/5] powerpc/64s/radix: Use non-atomic ops for PTE Nicholas Piggin
@ 2020-12-19 23:48 ` Nicholas Piggin
  2020-12-20 11:37   ` Christophe Leroy
  2020-12-19 23:48 ` [RFC PATCH 2/5] powerpc/64s: implement mm_nmmu_has_tlbs Nicholas Piggin
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 10+ messages in thread
From: Nicholas Piggin @ 2020-12-19 23:48 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

This allows the function to be entirely noped if hash support is
compiled out (not possible yet).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/book3s/pgtable.h | 11 ++++++++++-
 arch/powerpc/mm/book3s32/mmu.c            |  4 ++--
 arch/powerpc/mm/book3s64/hash_utils.c     |  7 ++-----
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
index 0e1263455d73..914e9fc7b069 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -35,7 +35,16 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
  * corresponding HPTE into the hash table ahead of time, instead of
  * waiting for the inevitable extra hash-table miss exception.
  */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
+void hash__update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
+
+static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+{
+#ifdef CONFIG_PPC64
+	if (radix_enabled())
+		return;
+#endif
+	hash__update_mmu_cache(vma, address, ptep);
+}
 
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 859e5bd603ac..c5a570ca37ff 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -325,8 +325,8 @@ static void hash_preload(struct mm_struct *mm, unsigned long ea)
  *
  * This must always be called with the pte lock held.
  */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
-		      pte_t *ptep)
+void hash__update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+			    pte_t *ptep)
 {
 	if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
 		return;
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 73b06adb6eeb..d52a3dee7cf2 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1667,8 +1667,8 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
  *
  * This must always be called with the pte lock held.
  */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
-		      pte_t *ptep)
+void hash__update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+			    pte_t *ptep)
 {
 	/*
 	 * We don't need to worry about _PAGE_PRESENT here because we are
@@ -1677,9 +1677,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 	unsigned long trap;
 	bool is_exec;
 
-	if (radix_enabled())
-		return;
-
 	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
 	if (!pte_young(*ptep) || address >= TASK_SIZE)
 		return;
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 2/5] powerpc/64s: implement mm_nmmu_has_tlbs
  2020-12-19 23:48 [RFC PATCH 0/5] powerpc/64s/radix: Use non-atomic ops for PTE Nicholas Piggin
  2020-12-19 23:48 ` [RFC PATCH 1/5] powerpc/64s: update_mmu_cache inline the radix test Nicholas Piggin
@ 2020-12-19 23:48 ` Nicholas Piggin
  2020-12-19 23:48 ` [RFC PATCH 3/5] powerpc/64s: add CONFIG_PPC_NMMU for nest MMU support Nicholas Piggin
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Nicholas Piggin @ 2020-12-19 23:48 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

Rather than read the copros field directly. This allows coprocessors
to be compiled out.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/book3s/64/tlbflush.h |  9 ++++++-
 arch/powerpc/mm/book3s64/radix_hugetlbpage.c  |  3 +--
 arch/powerpc/mm/book3s64/radix_pgtable.c      |  5 ++--
 arch/powerpc/mm/book3s64/radix_tlb.c          | 26 +++++++++----------
 4 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index dcb5c3839d2f..0a7431e954c6 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -14,6 +14,13 @@ enum {
 	TLB_INVAL_SCOPE_LPID = 1,	/* invalidate TLBs for current LPID */
 };
 
+static inline bool mm_has_nmmu(struct mm_struct *mm)
+{
+	if (unlikely(atomic_read(&mm->context.copros) > 0))
+		return true;
+	return false;
+}
+
 #ifdef CONFIG_PPC_NATIVE
 static inline void tlbiel_all(void)
 {
@@ -143,7 +150,7 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
 						unsigned long address)
 {
 	/* See ptep_set_access_flags comment */
-	if (atomic_read(&vma->vm_mm->context.copros) > 0)
+	if (mm_has_nmmu(vma->vm_mm))
 		flush_tlb_page(vma, address);
 }
 
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index cb91071eef52..2dfe1416d7db 100644
--- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -100,8 +100,7 @@ void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
 	 * To avoid NMMU hang while relaxing access we need to flush the tlb before
 	 * we set the new value.
 	 */
-	if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
-	    (atomic_read(&mm->context.copros) > 0))
+	if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && mm_has_nmmu(mm))
 		radix__flush_hugetlb_page(vma, addr);
 
 	set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 98f0b243c1ab..9495206b9b91 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -1042,7 +1042,7 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
 	 * To avoid NMMU hang while relaxing access, we need mark
 	 * the pte invalid in between.
 	 */
-	if ((change & _PAGE_RW) && atomic_read(&mm->context.copros) > 0) {
+	if ((change & _PAGE_RW) && mm_has_nmmu(mm)) {
 		unsigned long old_pte, new_pte;
 
 		old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID);
@@ -1075,8 +1075,7 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
 	 * we set the new value. We need to do this only for radix, because hash
 	 * translation does flush when updating the linux pte.
 	 */
-	if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
-	    (atomic_read(&mm->context.copros) > 0))
+	if (is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) && mm_has_nmmu(mm))
 		radix__flush_tlb_page(vma, addr);
 
 	set_pte_at(mm, addr, ptep, pte);
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 3455947a1b08..6633b47abee8 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -371,7 +371,7 @@ static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
 	 * these paths, so while coprocessors must use tlbie, we can not
 	 * optimise away the tlbiel component.
 	 */
-	if (atomic_read(&mm->context.copros) > 0)
+	if (mm_has_nmmu(mm))
 		_tlbie_pid(pid, RIC_FLUSH_ALL);
 }
 
@@ -504,7 +504,7 @@ static inline void _tlbiel_va_multicast(struct mm_struct *mm,
 	struct cpumask *cpus = mm_cpumask(mm);
 	struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
 	on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
-	if (atomic_read(&mm->context.copros) > 0)
+	if (mm_has_nmmu(mm))
 		_tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
 }
 
@@ -558,7 +558,7 @@ static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
 				.psize = psize, .also_pwc = also_pwc };
 
 	on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
-	if (atomic_read(&mm->context.copros) > 0)
+	if (mm_has_nmmu(mm))
 		_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
 }
 
@@ -634,9 +634,7 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm)
 	 * caching PTEs and not flushing them properly when
 	 * RIC = 0 for a PID/LPID invalidate
 	 */
-	if (atomic_read(&mm->context.copros) > 0)
-		return true;
-	return false;
+	return mm_has_nmmu(mm);
 }
 
 /*
@@ -759,8 +757,8 @@ static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
 		return FLUSH_TYPE_LOCAL;
 	}
 
-	/* Coprocessors require TLBIE to invalidate nMMU. */
-	if (atomic_read(&mm->context.copros) > 0)
+	/* The nest MMU requires TLBIE to invalidate its TLBs. */
+	if (mm_has_nmmu(mm))
 		return FLUSH_TYPE_GLOBAL;
 
 	/*
@@ -833,7 +831,7 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
 		if (!mmu_has_feature(MMU_FTR_GTSE)) {
 			unsigned long tgt = H_RPTI_TARGET_CMMU;
 
-			if (atomic_read(&mm->context.copros) > 0)
+			if (mm_has_nmmu(mm))
 				tgt |= H_RPTI_TARGET_NMMU;
 			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
 					       H_RPTI_PAGE_ALL, 0, -1UL);
@@ -870,7 +868,7 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
 			unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
 					     H_RPTI_TYPE_PRT;
 
-			if (atomic_read(&mm->context.copros) > 0)
+			if (mm_has_nmmu(mm))
 				tgt |= H_RPTI_TARGET_NMMU;
 			pseries_rpt_invalidate(pid, tgt, type,
 					       H_RPTI_PAGE_ALL, 0, -1UL);
@@ -911,7 +909,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 			pg_sizes = psize_to_rpti_pgsize(psize);
 			size = 1UL << mmu_psize_to_shift(psize);
 
-			if (atomic_read(&mm->context.copros) > 0)
+			if (mm_has_nmmu(mm))
 				tgt |= H_RPTI_TARGET_NMMU;
 			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
 					       pg_sizes, vmaddr,
@@ -1024,7 +1022,7 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
 
 		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
 			pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
-		if (atomic_read(&mm->context.copros) > 0)
+		if (mm_has_nmmu(mm))
 			tgt |= H_RPTI_TARGET_NMMU;
 		pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes,
 				       start, end);
@@ -1216,7 +1214,7 @@ static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
 
 		if (also_pwc)
 			type |= H_RPTI_TYPE_PWC;
-		if (atomic_read(&mm->context.copros) > 0)
+		if (mm_has_nmmu(mm))
 			tgt |= H_RPTI_TARGET_NMMU;
 		pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
 	} else if (flush_pid) {
@@ -1293,7 +1291,7 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
 			       H_RPTI_TYPE_PRT;
 			pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
 
-			if (atomic_read(&mm->context.copros) > 0)
+			if (mm_has_nmmu(mm))
 				tgt |= H_RPTI_TARGET_NMMU;
 			pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
 					       addr, end);
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 3/5] powerpc/64s: add CONFIG_PPC_NMMU for nest MMU support
  2020-12-19 23:48 [RFC PATCH 0/5] powerpc/64s/radix: Use non-atomic ops for PTE Nicholas Piggin
  2020-12-19 23:48 ` [RFC PATCH 1/5] powerpc/64s: update_mmu_cache inline the radix test Nicholas Piggin
  2020-12-19 23:48 ` [RFC PATCH 2/5] powerpc/64s: implement mm_nmmu_has_tlbs Nicholas Piggin
@ 2020-12-19 23:48 ` Nicholas Piggin
  2020-12-20 11:43   ` Christophe Leroy
  2020-12-19 23:48 ` [RFC PATCH 4/5] powerpc/64s/radix: implement complete radix__ptep_get_and_clear_full Nicholas Piggin
  2020-12-19 23:48 ` [RFC PATCH 5/5] powerpc/64s/radix: Use non-atomic PTE updates if the MMU does not modify the PTE Nicholas Piggin
  4 siblings, 1 reply; 10+ messages in thread
From: Nicholas Piggin @ 2020-12-19 23:48 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

This allows some nest MMU features to be compiled away if coprocessor
support is not selected.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/Kconfig                          | 1 +
 arch/powerpc/include/asm/book3s/64/mmu.h      | 2 ++
 arch/powerpc/include/asm/book3s/64/tlbflush.h | 2 ++
 arch/powerpc/include/asm/mmu_context.h        | 5 +++--
 arch/powerpc/platforms/Kconfig                | 3 +++
 arch/powerpc/platforms/powernv/Kconfig        | 1 +
 6 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index ae7391627054..4376bf4c53b4 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -888,6 +888,7 @@ config PPC_PROT_SAO_LPAR
 
 config PPC_COPRO_BASE
 	bool
+	select PPC_NMMU if PPC_BOOK3S_64
 
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 995bbcdd0ef8..07850d68a624 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -103,8 +103,10 @@ typedef struct {
 	/* Number of bits in the mm_cpumask */
 	atomic_t active_cpus;
 
+#ifdef CONFIG_PPC_NMMU
 	/* Number of users of the external (Nest) MMU */
 	atomic_t copros;
+#endif
 
 	/* Number of user space windows opened in process mm_context */
 	atomic_t vas_windows;
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 0a7431e954c6..c70a82851f78 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -16,8 +16,10 @@ enum {
 
 static inline bool mm_has_nmmu(struct mm_struct *mm)
 {
+#ifdef CONFIG_PPC_NMMU
 	if (unlikely(atomic_read(&mm->context.copros) > 0))
 		return true;
+#endif
 	return false;
 }
 
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index d5821834dba9..53eac0cc4929 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -143,6 +143,7 @@ static inline void dec_mm_active_cpus(struct mm_struct *mm)
 	atomic_dec(&mm->context.active_cpus);
 }
 
+#ifdef CONFIG_PPC_NMMU
 static inline void mm_context_add_copro(struct mm_struct *mm)
 {
 	/*
@@ -187,6 +188,7 @@ static inline void mm_context_remove_copro(struct mm_struct *mm)
 			dec_mm_active_cpus(mm);
 	}
 }
+#endif
 
 /*
  * vas_windows counter shows number of open windows in the mm
@@ -218,8 +220,7 @@ static inline void mm_context_remove_vas_window(struct mm_struct *mm)
 #else
 static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
 static inline void dec_mm_active_cpus(struct mm_struct *mm) { }
-static inline void mm_context_add_copro(struct mm_struct *mm) { }
-static inline void mm_context_remove_copro(struct mm_struct *mm) { }
+static inline bool mm_has_nmmu(struct mm_struct *mm) { return false; }
 #endif
 
 
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 7a5e8f4541e3..b4b04b3f98d1 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -191,6 +191,9 @@ config PPC_INDIRECT_MMIO
 config PPC_IO_WORKAROUNDS
 	bool
 
+config PPC_NMMU
+	bool
+
 source "drivers/cpufreq/Kconfig"
 
 menu "CPUIdle driver"
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 619b093a0657..145009d74457 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -36,6 +36,7 @@ config PPC_MEMTRACE
 config PPC_VAS
 	bool "IBM Virtual Accelerator Switchboard (VAS)"
 	depends on PPC_POWERNV && PPC_64K_PAGES
+	select PPC_NMMU
 	default y
 	help
 	  This enables support for IBM Virtual Accelerator Switchboard (VAS).
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 4/5] powerpc/64s/radix: implement complete radix__ptep_get_and_clear_full
  2020-12-19 23:48 [RFC PATCH 0/5] powerpc/64s/radix: Use non-atomic ops for PTE Nicholas Piggin
                   ` (2 preceding siblings ...)
  2020-12-19 23:48 ` [RFC PATCH 3/5] powerpc/64s: add CONFIG_PPC_NMMU for nest MMU support Nicholas Piggin
@ 2020-12-19 23:48 ` Nicholas Piggin
  2020-12-19 23:48 ` [RFC PATCH 5/5] powerpc/64s/radix: Use non-atomic PTE updates if the MMU does not modify the PTE Nicholas Piggin
  4 siblings, 0 replies; 10+ messages in thread
From: Nicholas Piggin @ 2020-12-19 23:48 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

Currently the radix variant is only called when full=true. Allow
radix to take over the entire function.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 +++-------
 arch/powerpc/include/asm/book3s/64/radix.h   |  4 ++++
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 058601efbc8a..410a917c0c60 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -483,14 +483,10 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 					    unsigned long addr,
 					    pte_t *ptep, int full)
 {
-	if (full && radix_enabled()) {
-		/*
-		 * We know that this is a full mm pte clear and
-		 * hence can be sure there is no parallel set_pte.
-		 */
+	if (radix_enabled())
 		return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
-	}
-	return ptep_get_and_clear(mm, addr, ptep);
+	else
+		return ptep_get_and_clear(mm, addr, ptep);
 }
 
 
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index c7813dc628fc..2491f3befda0 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -185,6 +185,10 @@ static inline pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm,
 	unsigned long old_pte;
 
 	if (full) {
+		/*
+		 * We know that this is a full mm pte clear and
+		 * hence can be sure there is no parallel set_pte.
+		 */
 		old_pte = pte_val(*ptep);
 		*ptep = __pte(0);
 	} else
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [RFC PATCH 5/5] powerpc/64s/radix: Use non-atomic PTE updates if the MMU does not modify the PTE
  2020-12-19 23:48 [RFC PATCH 0/5] powerpc/64s/radix: Use non-atomic ops for PTE Nicholas Piggin
                   ` (3 preceding siblings ...)
  2020-12-19 23:48 ` [RFC PATCH 4/5] powerpc/64s/radix: implement complete radix__ptep_get_and_clear_full Nicholas Piggin
@ 2020-12-19 23:48 ` Nicholas Piggin
  4 siblings, 0 replies; 10+ messages in thread
From: Nicholas Piggin @ 2020-12-19 23:48 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

PTE updates performed by Linux are serialized by the page table locks.
There is no need to use atomic operations for these modifications if the
hardware does not perform updates asynchronously (e.g., via the MMU RC
update mechanism).

Current POWER CPUs that support radix do not perform such updates in the
core MMU, but the nest MMU does so make this conditional on whether the
nest mmu is active for the mm.

This improves a page fault / mprotect microbenchmark by about 10% on
POWER9.

There is a question of how compatibility would be work if a future
processor implements core MMU PTE udpates. This is being discussed with
ISA developers at the moment...

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/book3s/64/radix.h | 54 ++++++---------
 arch/powerpc/mm/book3s64/radix_pgtable.c   | 79 ++++++++++++++++++----
 2 files changed, 83 insertions(+), 50 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 2491f3befda0..837ed6fb1c7d 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -137,16 +137,17 @@ extern void radix__mark_rodata_ro(void);
 extern void radix__mark_initmem_nx(void);
 #endif
 
-extern void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
-					 pte_t entry, unsigned long address,
-					 int psize);
-
-extern void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
-					   unsigned long addr, pte_t *ptep,
-					   pte_t old_pte, pte_t pte);
-
-static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
-					       unsigned long set)
+void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
+				pte_t entry, unsigned long address, int psize);
+void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+				pte_t old_pte, pte_t pte);
+unsigned long radix__pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+				unsigned long clr, unsigned long set, int huge);
+pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+				int full);
+
+static inline unsigned long __radix_pte_update_atomic(pte_t *ptep, unsigned long clr,
+							unsigned long set)
 {
 	__be64 old_be, tmp_be;
 
@@ -163,38 +164,21 @@ static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
 	return be64_to_cpu(old_be);
 }
 
-static inline unsigned long radix__pte_update(struct mm_struct *mm,
-					unsigned long addr,
-					pte_t *ptep, unsigned long clr,
-					unsigned long set,
-					int huge)
+static inline unsigned long __radix_pte_update_nonatomic(pte_t *ptep, unsigned long clr,
+							unsigned long set)
 {
-	unsigned long old_pte;
+	unsigned long old_pte = pte_val(*ptep);
 
-	old_pte = __radix_pte_update(ptep, clr, set);
-	if (!huge)
-		assert_pte_locked(mm, addr);
+	*ptep = __pte((old_pte & ~clr) | set);
 
 	return old_pte;
 }
 
-static inline pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm,
-						   unsigned long addr,
-						   pte_t *ptep, int full)
+static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
+						unsigned long set)
 {
-	unsigned long old_pte;
-
-	if (full) {
-		/*
-		 * We know that this is a full mm pte clear and
-		 * hence can be sure there is no parallel set_pte.
-		 */
-		old_pte = pte_val(*ptep);
-		*ptep = __pte(0);
-	} else
-		old_pte = radix__pte_update(mm, addr, ptep, ~0ul, 0, 0);
-
-	return __pte(old_pte);
+	/* Must use atomic updates because accelerators may be attached which can modify the pte */
+	return __radix_pte_update_atomic(ptep, clr, set);
 }
 
 static inline int radix__pte_same(pte_t pte_a, pte_t pte_b)
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 9495206b9b91..f7ecb90daf87 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -1030,6 +1030,47 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
 
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+static inline bool radix__mm_has_atomic_pte_update(struct mm_struct *mm)
+{
+	if (mm_has_nmmu(mm))
+		return true;
+	return false;
+}
+
+static inline bool radix__mm_has_core_atomic_pte_update(struct mm_struct *mm)
+{
+	return false;
+}
+
+unsigned long radix__pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+				unsigned long clr, unsigned long set, int huge)
+{
+	unsigned long old_pte;
+
+	if (!huge)
+		assert_pte_locked(mm, addr);
+
+	if (radix__mm_has_atomic_pte_update(mm))
+		old_pte = __radix_pte_update_atomic(ptep, clr, set);
+	else
+		old_pte = __radix_pte_update_nonatomic(ptep, clr, set);
+
+	return old_pte;
+}
+
+pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+				int full)
+{
+	unsigned long old_pte;
+
+	if (full)
+		old_pte = __radix_pte_update_nonatomic(ptep, ~0ul, 0);
+	else
+		old_pte = radix__pte_update(mm, addr, ptep, ~0ul, 0, 0);
+
+	return __pte(old_pte);
+}
+
 void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
 				  pte_t entry, unsigned long address, int psize)
 {
@@ -1037,23 +1078,31 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
 	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
 					      _PAGE_RW | _PAGE_EXEC);
 
-	unsigned long change = pte_val(entry) ^ pte_val(*ptep);
-	/*
-	 * To avoid NMMU hang while relaxing access, we need mark
-	 * the pte invalid in between.
-	 */
-	if ((change & _PAGE_RW) && mm_has_nmmu(mm)) {
-		unsigned long old_pte, new_pte;
+	if (mm_has_nmmu(mm)) {
+		unsigned long change = pte_val(entry) ^ pte_val(*ptep);
 
-		old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID);
-		/*
-		 * new value of pte
-		 */
-		new_pte = old_pte | set;
-		radix__flush_tlb_page_psize(mm, address, psize);
-		__radix_pte_update(ptep, _PAGE_INVALID, new_pte);
+		if ((change & _PAGE_RW)) {
+			unsigned long old_pte, new_pte;
+
+			/*
+			 * To avoid NMMU hang while relaxing access, we need mark
+			 * the pte invalid in between.
+			 */
+			old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID);
+			/*
+			 * new value of pte
+			 */
+			new_pte = old_pte | set;
+			radix__flush_tlb_page_psize(mm, address, psize);
+			__radix_pte_update(ptep, _PAGE_INVALID, new_pte);
+		} else {
+			__radix_pte_update(ptep, 0, set);
+		}
 	} else {
-		__radix_pte_update(ptep, 0, set);
+		if (radix__mm_has_core_atomic_pte_update(mm))
+			__radix_pte_update_atomic(ptep, 0, set);
+		else
+			__radix_pte_update_nonatomic(ptep, 0, set);
 		/*
 		 * Book3S does not require a TLB flush when relaxing access
 		 * restrictions when the address space is not attached to a
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 1/5] powerpc/64s: update_mmu_cache inline the radix test
  2020-12-19 23:48 ` [RFC PATCH 1/5] powerpc/64s: update_mmu_cache inline the radix test Nicholas Piggin
@ 2020-12-20 11:37   ` Christophe Leroy
  2020-12-22  3:32     ` Nicholas Piggin
  0 siblings, 1 reply; 10+ messages in thread
From: Christophe Leroy @ 2020-12-20 11:37 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev



Le 20/12/2020 à 00:48, Nicholas Piggin a écrit :
> This allows the function to be entirely noped if hash support is
> compiled out (not possible yet).
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   arch/powerpc/include/asm/book3s/pgtable.h | 11 ++++++++++-
>   arch/powerpc/mm/book3s32/mmu.c            |  4 ++--
>   arch/powerpc/mm/book3s64/hash_utils.c     |  7 ++-----
>   3 files changed, 14 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
> index 0e1263455d73..914e9fc7b069 100644
> --- a/arch/powerpc/include/asm/book3s/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/pgtable.h
> @@ -35,7 +35,16 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
>    * corresponding HPTE into the hash table ahead of time, instead of
>    * waiting for the inevitable extra hash-table miss exception.
>    */
> -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
> +void hash__update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
> +
> +static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
> +{
> +#ifdef CONFIG_PPC64

You shouldn't need that ifdef. radix_enabled() is always defined.

> +	if (radix_enabled())
> +		return;
> +#endif
> +	hash__update_mmu_cache(vma, address, ptep);
> +}
>   
>   #endif /* __ASSEMBLY__ */
>   #endif
> diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
> index 859e5bd603ac..c5a570ca37ff 100644
> --- a/arch/powerpc/mm/book3s32/mmu.c
> +++ b/arch/powerpc/mm/book3s32/mmu.c
> @@ -325,8 +325,8 @@ static void hash_preload(struct mm_struct *mm, unsigned long ea)
>    *
>    * This must always be called with the pte lock held.
>    */
> -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
> -		      pte_t *ptep)
> +void hash__update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
> +			    pte_t *ptep)

Now the limit is 100 chars per line. This should fit on a single line I think.

>   {
>   	if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
>   		return;
> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
> index 73b06adb6eeb..d52a3dee7cf2 100644
> --- a/arch/powerpc/mm/book3s64/hash_utils.c
> +++ b/arch/powerpc/mm/book3s64/hash_utils.c
> @@ -1667,8 +1667,8 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
>    *
>    * This must always be called with the pte lock held.
>    */
> -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
> -		      pte_t *ptep)
> +void hash__update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
> +			    pte_t *ptep)

Now the limit is 100 chars per line. This should fit on a single line I think.

>   {
>   	/*
>   	 * We don't need to worry about _PAGE_PRESENT here because we are
> @@ -1677,9 +1677,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
>   	unsigned long trap;
>   	bool is_exec;
>   
> -	if (radix_enabled())
> -		return;
> -
>   	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
>   	if (!pte_young(*ptep) || address >= TASK_SIZE)
>   		return;
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 3/5] powerpc/64s: add CONFIG_PPC_NMMU for nest MMU support
  2020-12-19 23:48 ` [RFC PATCH 3/5] powerpc/64s: add CONFIG_PPC_NMMU for nest MMU support Nicholas Piggin
@ 2020-12-20 11:43   ` Christophe Leroy
  2020-12-22  3:37     ` Nicholas Piggin
  0 siblings, 1 reply; 10+ messages in thread
From: Christophe Leroy @ 2020-12-20 11:43 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev



Le 20/12/2020 à 00:48, Nicholas Piggin a écrit :
> This allows some nest MMU features to be compiled away if coprocessor
> support is not selected.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   arch/powerpc/Kconfig                          | 1 +
>   arch/powerpc/include/asm/book3s/64/mmu.h      | 2 ++
>   arch/powerpc/include/asm/book3s/64/tlbflush.h | 2 ++
>   arch/powerpc/include/asm/mmu_context.h        | 5 +++--
>   arch/powerpc/platforms/Kconfig                | 3 +++
>   arch/powerpc/platforms/powernv/Kconfig        | 1 +
>   6 files changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index ae7391627054..4376bf4c53b4 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -888,6 +888,7 @@ config PPC_PROT_SAO_LPAR
>   
>   config PPC_COPRO_BASE
>   	bool
> +	select PPC_NMMU if PPC_BOOK3S_64
>   
>   config SCHED_SMT
>   	bool "SMT (Hyperthreading) scheduler support"
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
> index 995bbcdd0ef8..07850d68a624 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
> @@ -103,8 +103,10 @@ typedef struct {
>   	/* Number of bits in the mm_cpumask */
>   	atomic_t active_cpus;
>   
> +#ifdef CONFIG_PPC_NMMU
>   	/* Number of users of the external (Nest) MMU */
>   	atomic_t copros;
> +#endif
>   
>   	/* Number of user space windows opened in process mm_context */
>   	atomic_t vas_windows;
> diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
> index 0a7431e954c6..c70a82851f78 100644
> --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
> +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
> @@ -16,8 +16,10 @@ enum {
>   
>   static inline bool mm_has_nmmu(struct mm_struct *mm)
>   {
> +#ifdef CONFIG_PPC_NMMU
>   	if (unlikely(atomic_read(&mm->context.copros) > 0))
>   		return true;
> +#endif
>   	return false;
>   }
>   
> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> index d5821834dba9..53eac0cc4929 100644
> --- a/arch/powerpc/include/asm/mmu_context.h
> +++ b/arch/powerpc/include/asm/mmu_context.h
> @@ -143,6 +143,7 @@ static inline void dec_mm_active_cpus(struct mm_struct *mm)
>   	atomic_dec(&mm->context.active_cpus);
>   }
>   
> +#ifdef CONFIG_PPC_NMMU
>   static inline void mm_context_add_copro(struct mm_struct *mm)
>   {
>   	/*
> @@ -187,6 +188,7 @@ static inline void mm_context_remove_copro(struct mm_struct *mm)
>   			dec_mm_active_cpus(mm);
>   	}
>   }
> +#endif
>   
>   /*
>    * vas_windows counter shows number of open windows in the mm
> @@ -218,8 +220,7 @@ static inline void mm_context_remove_vas_window(struct mm_struct *mm)
>   #else
>   static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
>   static inline void dec_mm_active_cpus(struct mm_struct *mm) { }
> -static inline void mm_context_add_copro(struct mm_struct *mm) { }
> -static inline void mm_context_remove_copro(struct mm_struct *mm) { }

Are you sure you can remove those ?
If so, I think it belongs to another patch, I can't see how the new PPC_NMMU would allow that by itself.

> +static inline bool mm_has_nmmu(struct mm_struct *mm) { return false; }
>   #endif
>   
>   
> diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
> index 7a5e8f4541e3..b4b04b3f98d1 100644
> --- a/arch/powerpc/platforms/Kconfig
> +++ b/arch/powerpc/platforms/Kconfig
> @@ -191,6 +191,9 @@ config PPC_INDIRECT_MMIO
>   config PPC_IO_WORKAROUNDS
>   	bool
>   
> +config PPC_NMMU
> +	bool
> +
>   source "drivers/cpufreq/Kconfig"
>   
>   menu "CPUIdle driver"
> diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
> index 619b093a0657..145009d74457 100644
> --- a/arch/powerpc/platforms/powernv/Kconfig
> +++ b/arch/powerpc/platforms/powernv/Kconfig
> @@ -36,6 +36,7 @@ config PPC_MEMTRACE
>   config PPC_VAS
>   	bool "IBM Virtual Accelerator Switchboard (VAS)"
>   	depends on PPC_POWERNV && PPC_64K_PAGES
> +	select PPC_NMMU
>   	default y
>   	help
>   	  This enables support for IBM Virtual Accelerator Switchboard (VAS).
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 1/5] powerpc/64s: update_mmu_cache inline the radix test
  2020-12-20 11:37   ` Christophe Leroy
@ 2020-12-22  3:32     ` Nicholas Piggin
  0 siblings, 0 replies; 10+ messages in thread
From: Nicholas Piggin @ 2020-12-22  3:32 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev

Excerpts from Christophe Leroy's message of December 20, 2020 9:37 pm:
> 
> 
> Le 20/12/2020 à 00:48, Nicholas Piggin a écrit :
>> This allows the function to be entirely noped if hash support is
>> compiled out (not possible yet).
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>   arch/powerpc/include/asm/book3s/pgtable.h | 11 ++++++++++-
>>   arch/powerpc/mm/book3s32/mmu.c            |  4 ++--
>>   arch/powerpc/mm/book3s64/hash_utils.c     |  7 ++-----
>>   3 files changed, 14 insertions(+), 8 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
>> index 0e1263455d73..914e9fc7b069 100644
>> --- a/arch/powerpc/include/asm/book3s/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/pgtable.h
>> @@ -35,7 +35,16 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
>>    * corresponding HPTE into the hash table ahead of time, instead of
>>    * waiting for the inevitable extra hash-table miss exception.
>>    */
>> -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
>> +void hash__update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
>> +
>> +static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
>> +{
>> +#ifdef CONFIG_PPC64
> 
> You shouldn't need that ifdef. radix_enabled() is always defined.

True, thanks.

>> +	if (radix_enabled())
>> +		return;
>> +#endif
>> +	hash__update_mmu_cache(vma, address, ptep);
>> +}
>>   
>>   #endif /* __ASSEMBLY__ */
>>   #endif
>> diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
>> index 859e5bd603ac..c5a570ca37ff 100644
>> --- a/arch/powerpc/mm/book3s32/mmu.c
>> +++ b/arch/powerpc/mm/book3s32/mmu.c
>> @@ -325,8 +325,8 @@ static void hash_preload(struct mm_struct *mm, unsigned long ea)
>>    *
>>    * This must always be called with the pte lock held.
>>    */
>> -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
>> -		      pte_t *ptep)
>> +void hash__update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
>> +			    pte_t *ptep)
> 
> Now the limit is 100 chars per line. This should fit on a single line I think.

I never quite know what to do here. The Linux limit is 100 but 80 is 
still preferred AFAIK (e.g., don't make lots of lines beyond 80), but 
80-100 can be used in some cases when splitting the line doesn't improve 
readability on 80 colums.

This does (slightly) improve readability.

Thanks,
Nick

> 
>>   {
>>   	if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
>>   		return;
>> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
>> index 73b06adb6eeb..d52a3dee7cf2 100644
>> --- a/arch/powerpc/mm/book3s64/hash_utils.c
>> +++ b/arch/powerpc/mm/book3s64/hash_utils.c
>> @@ -1667,8 +1667,8 @@ static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
>>    *
>>    * This must always be called with the pte lock held.
>>    */
>> -void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
>> -		      pte_t *ptep)
>> +void hash__update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
>> +			    pte_t *ptep)
> 
> Now the limit is 100 chars per line. This should fit on a single line I think.
> 
>>   {
>>   	/*
>>   	 * We don't need to worry about _PAGE_PRESENT here because we are
>> @@ -1677,9 +1677,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
>>   	unsigned long trap;
>>   	bool is_exec;
>>   
>> -	if (radix_enabled())
>> -		return;
>> -
>>   	/* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
>>   	if (!pte_young(*ptep) || address >= TASK_SIZE)
>>   		return;
>> 
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH 3/5] powerpc/64s: add CONFIG_PPC_NMMU for nest MMU support
  2020-12-20 11:43   ` Christophe Leroy
@ 2020-12-22  3:37     ` Nicholas Piggin
  0 siblings, 0 replies; 10+ messages in thread
From: Nicholas Piggin @ 2020-12-22  3:37 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev

Excerpts from Christophe Leroy's message of December 20, 2020 9:43 pm:
> 
> 
> Le 20/12/2020 à 00:48, Nicholas Piggin a écrit :
>> This allows some nest MMU features to be compiled away if coprocessor
>> support is not selected.
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>   arch/powerpc/Kconfig                          | 1 +
>>   arch/powerpc/include/asm/book3s/64/mmu.h      | 2 ++
>>   arch/powerpc/include/asm/book3s/64/tlbflush.h | 2 ++
>>   arch/powerpc/include/asm/mmu_context.h        | 5 +++--
>>   arch/powerpc/platforms/Kconfig                | 3 +++
>>   arch/powerpc/platforms/powernv/Kconfig        | 1 +
>>   6 files changed, 12 insertions(+), 2 deletions(-)
>> 
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index ae7391627054..4376bf4c53b4 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -888,6 +888,7 @@ config PPC_PROT_SAO_LPAR
>>   
>>   config PPC_COPRO_BASE
>>   	bool
>> +	select PPC_NMMU if PPC_BOOK3S_64
>>   
>>   config SCHED_SMT
>>   	bool "SMT (Hyperthreading) scheduler support"
>> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
>> index 995bbcdd0ef8..07850d68a624 100644
>> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
>> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
>> @@ -103,8 +103,10 @@ typedef struct {
>>   	/* Number of bits in the mm_cpumask */
>>   	atomic_t active_cpus;
>>   
>> +#ifdef CONFIG_PPC_NMMU
>>   	/* Number of users of the external (Nest) MMU */
>>   	atomic_t copros;
>> +#endif
>>   
>>   	/* Number of user space windows opened in process mm_context */
>>   	atomic_t vas_windows;
>> diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
>> index 0a7431e954c6..c70a82851f78 100644
>> --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
>> +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
>> @@ -16,8 +16,10 @@ enum {
>>   
>>   static inline bool mm_has_nmmu(struct mm_struct *mm)
>>   {
>> +#ifdef CONFIG_PPC_NMMU
>>   	if (unlikely(atomic_read(&mm->context.copros) > 0))
>>   		return true;
>> +#endif
>>   	return false;
>>   }
>>   
>> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
>> index d5821834dba9..53eac0cc4929 100644
>> --- a/arch/powerpc/include/asm/mmu_context.h
>> +++ b/arch/powerpc/include/asm/mmu_context.h
>> @@ -143,6 +143,7 @@ static inline void dec_mm_active_cpus(struct mm_struct *mm)
>>   	atomic_dec(&mm->context.active_cpus);
>>   }
>>   
>> +#ifdef CONFIG_PPC_NMMU
>>   static inline void mm_context_add_copro(struct mm_struct *mm)
>>   {
>>   	/*
>> @@ -187,6 +188,7 @@ static inline void mm_context_remove_copro(struct mm_struct *mm)
>>   			dec_mm_active_cpus(mm);
>>   	}
>>   }
>> +#endif
>>   
>>   /*
>>    * vas_windows counter shows number of open windows in the mm
>> @@ -218,8 +220,7 @@ static inline void mm_context_remove_vas_window(struct mm_struct *mm)
>>   #else
>>   static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
>>   static inline void dec_mm_active_cpus(struct mm_struct *mm) { }
>> -static inline void mm_context_add_copro(struct mm_struct *mm) { }
>> -static inline void mm_context_remove_copro(struct mm_struct *mm) { }
> 
> Are you sure you can remove those ?
> If so, I think it belongs to another patch, I can't see how the new PPC_NMMU would allow that by itself.

Yeah possibly a separate patch. Nothing except 64s should compile such
code though, I think?

Thanks,
Nick

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-12-22  3:39 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-19 23:48 [RFC PATCH 0/5] powerpc/64s/radix: Use non-atomic ops for PTE Nicholas Piggin
2020-12-19 23:48 ` [RFC PATCH 1/5] powerpc/64s: update_mmu_cache inline the radix test Nicholas Piggin
2020-12-20 11:37   ` Christophe Leroy
2020-12-22  3:32     ` Nicholas Piggin
2020-12-19 23:48 ` [RFC PATCH 2/5] powerpc/64s: implement mm_nmmu_has_tlbs Nicholas Piggin
2020-12-19 23:48 ` [RFC PATCH 3/5] powerpc/64s: add CONFIG_PPC_NMMU for nest MMU support Nicholas Piggin
2020-12-20 11:43   ` Christophe Leroy
2020-12-22  3:37     ` Nicholas Piggin
2020-12-19 23:48 ` [RFC PATCH 4/5] powerpc/64s/radix: implement complete radix__ptep_get_and_clear_full Nicholas Piggin
2020-12-19 23:48 ` [RFC PATCH 5/5] powerpc/64s/radix: Use non-atomic PTE updates if the MMU does not modify the PTE Nicholas Piggin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).