LinuxPPC-Dev Archive on lore.kernel.org
 help / color / Atom feed
* [RFC PATCH] powerpc/64s/radix: introduce option to disable broadcast tlbie
@ 2019-07-31 12:32 Nicholas Piggin
  2019-07-31 13:56 ` Christophe Leroy
  2019-08-14  2:06 ` Michael Ellerman
  0 siblings, 2 replies; 4+ messages in thread
From: Nicholas Piggin @ 2019-07-31 12:32 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Aneesh Kumar K . V, Paul Mackerras, Nicholas Piggin

This is an initial hack of a quick option to disable use of the tlbie
instruction. This takes the simplest possible initial pass of just
replacing low level tlbie functions with IPIs. This means it's not as
performant as it could be if we spend some time optmizing it, but on
the other hand having a 1:1 replacement of tlbie is simple and can be
useful for comparisons so I think it's the right initial approach.

It's not entirely complete, doesn't deal with accelerators (reverts to
tlbie), not all the boot code is converted, kernel space invalidations
not converted, and KVM not converted, also radix only to start with. We
can start to add more cases if this will be useful.

Thanks,
Nick
---
 arch/powerpc/mm/book3s64/radix_tlb.c | 149 +++++++++++++++++++++++++--
 1 file changed, 140 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 71f7fede2fa4..56ceecbd3d5c 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -11,6 +11,7 @@
 #include <linux/mmu_context.h>
 #include <linux/sched/mm.h>
 
+#include <asm/debugfs.h>
 #include <asm/ppc-opcode.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
@@ -285,6 +286,30 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
+struct tlbiel_pid {
+	unsigned long pid;
+	unsigned long ric;
+};
+
+static void do_tlbiel_pid(void *info)
+{
+	struct tlbiel_pid *t = info;
+
+	if (t->ric == RIC_FLUSH_TLB)
+		_tlbiel_pid(t->pid, RIC_FLUSH_TLB);
+	else if (t->ric == RIC_FLUSH_PWC)
+		_tlbiel_pid(t->pid, RIC_FLUSH_PWC);
+	else
+		_tlbiel_pid(t->pid, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbiel_pid_broadcast(const struct cpumask *cpus,
+				unsigned long pid, unsigned long ric)
+{
+	struct tlbiel_pid t = { .pid = pid, .ric = ric };
+	on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
+}
+
 static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric)
 {
 	int set;
@@ -420,6 +445,61 @@ static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
+struct tlbiel_va {
+	unsigned long pid;
+	unsigned long va;
+	unsigned long psize;
+	unsigned long ric;
+};
+
+static void do_tlbiel_va(void *info)
+{
+	struct tlbiel_va *t = info;
+
+	if (t->ric == RIC_FLUSH_TLB)
+		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
+	else if (t->ric == RIC_FLUSH_PWC)
+		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
+	else
+		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbiel_va_broadcast(const struct cpumask *cpus,
+				unsigned long va, unsigned long pid,
+				unsigned long psize, unsigned long ric)
+{
+	struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
+	on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
+}
+
+struct tlbiel_va_range {
+	unsigned long pid;
+	unsigned long start;
+	unsigned long end;
+	unsigned long page_size;
+	unsigned long psize;
+	bool also_pwc;
+};
+
+static void do_tlbiel_va_range(void *info)
+{
+	struct tlbiel_va_range *t = info;
+
+	_tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
+				    t->psize, t->also_pwc);
+}
+
+static inline void _tlbiel_va_range_broadcast(const struct cpumask *cpus,
+				unsigned long start, unsigned long end,
+				unsigned long pid, unsigned long page_size,
+				unsigned long psize, bool also_pwc)
+{
+	struct tlbiel_va_range t = { .start = start, .end = end,
+				.pid = pid, .page_size = page_size,
+				.psize = psize, .also_pwc = also_pwc };
+	on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
+}
+
 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
 			      unsigned long psize, unsigned long ric)
 {
@@ -524,6 +604,12 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm)
 	return false;
 }
 
+static bool tlbie_enabled = true;
+static bool use_tlbie(void)
+{
+	return tlbie_enabled;
+}
+
 #ifdef CONFIG_SMP
 static void do_exit_flush_lazy_tlb(void *arg)
 {
@@ -582,8 +668,10 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
 
 		if (mm_needs_flush_escalation(mm))
 			_tlbie_pid(pid, RIC_FLUSH_ALL);
-		else
+		else if (use_tlbie())
 			_tlbie_pid(pid, RIC_FLUSH_TLB);
+		else
+			_tlbiel_pid_broadcast(mm_cpumask(mm), pid, RIC_FLUSH_TLB);
 	} else {
 local:
 		_tlbiel_pid(pid, RIC_FLUSH_TLB);
@@ -609,7 +697,10 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
 				goto local;
 			}
 		}
-		_tlbie_pid(pid, RIC_FLUSH_ALL);
+		if (mm_needs_flush_escalation(mm) || use_tlbie())
+			_tlbie_pid(pid, RIC_FLUSH_ALL);
+		else
+			_tlbiel_pid_broadcast(mm_cpumask(mm), pid, RIC_FLUSH_ALL);
 	} else {
 local:
 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
@@ -644,7 +735,11 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 			exit_flush_lazy_tlbs(mm);
 			goto local;
 		}
-		_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+		if (mm_needs_flush_escalation(mm) || use_tlbie())
+			_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+		else
+			_tlbiel_va_broadcast(mm_cpumask(mm),
+				       vmaddr, pid, psize, RIC_FLUSH_TLB);
 	} else {
 local:
 		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
@@ -731,8 +826,11 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
 		} else {
 			if (mm_needs_flush_escalation(mm))
 				_tlbie_pid(pid, RIC_FLUSH_ALL);
-			else
+			else if (use_tlbie())
 				_tlbie_pid(pid, RIC_FLUSH_TLB);
+			else
+				_tlbiel_pid_broadcast(mm_cpumask(mm),
+						pid, RIC_FLUSH_TLB);
 		}
 	} else {
 		bool hflush = flush_all_sizes;
@@ -757,8 +855,8 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
 				gflush = false;
 		}
 
-		asm volatile("ptesync": : :"memory");
 		if (local) {
+			asm volatile("ptesync": : :"memory");
 			__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
 			if (hflush)
 				__tlbiel_va_range(hstart, hend, pid,
@@ -767,7 +865,8 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
 				__tlbiel_va_range(gstart, gend, pid,
 						PUD_SIZE, MMU_PAGE_1G);
 			asm volatile("ptesync": : :"memory");
-		} else {
+		} else if (use_tlbie()) {
+			asm volatile("ptesync": : :"memory");
 			__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
 			if (hflush)
 				__tlbie_va_range(hstart, hend, pid,
@@ -777,6 +876,15 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
 						PUD_SIZE, MMU_PAGE_1G);
 			fixup_tlbie();
 			asm volatile("eieio; tlbsync; ptesync": : :"memory");
+		} else {
+			_tlbiel_va_range_broadcast(mm_cpumask(mm),
+					start, end, pid, page_size, mmu_virtual_psize, false);
+			if (hflush)
+				_tlbiel_va_range_broadcast(mm_cpumask(mm),
+					hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
+			if (gflush)
+				_tlbiel_va_range_broadcast(mm_cpumask(mm),
+					gstart, gend, pid, PUD_SIZE, MMU_PAGE_1G, false);
 		}
 	}
 	preempt_enable();
@@ -969,13 +1077,22 @@ static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
 			if (mm_needs_flush_escalation(mm))
 				also_pwc = true;
 
-			_tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+			if (use_tlbie())
+				_tlbie_pid(pid,
+					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+			else
+				_tlbiel_pid_broadcast(mm_cpumask(mm), pid,
+					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+
 		}
 	} else {
 		if (local)
 			_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
-		else
+		else if (mm_needs_flush_escalation(mm) || use_tlbie())
 			_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
+		else
+			_tlbiel_va_range_broadcast(mm_cpumask(mm),
+					start, end, pid, page_size, psize, also_pwc);
 	}
 	preempt_enable();
 }
@@ -1017,7 +1134,11 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
 			exit_flush_lazy_tlbs(mm);
 			goto local;
 		}
-		_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+		if (mm_needs_flush_escalation(mm) || use_tlbie())
+			_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+		else
+			_tlbiel_va_range_broadcast(mm_cpumask(mm),
+					addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
 	} else {
 local:
 		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
@@ -1100,3 +1221,13 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+static int __init radix_tlb_setup(void)
+{
+	debugfs_create_bool("tlbie_enabled", 0600,
+			powerpc_debugfs_root,
+			&tlbie_enabled);
+
+	return 0;
+}
+arch_initcall(radix_tlb_setup);
-- 
2.22.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC PATCH] powerpc/64s/radix: introduce option to disable broadcast tlbie
  2019-07-31 12:32 [RFC PATCH] powerpc/64s/radix: introduce option to disable broadcast tlbie Nicholas Piggin
@ 2019-07-31 13:56 ` Christophe Leroy
  2019-07-31 23:05   ` Nicholas Piggin
  2019-08-14  2:06 ` Michael Ellerman
  1 sibling, 1 reply; 4+ messages in thread
From: Christophe Leroy @ 2019-07-31 13:56 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev; +Cc: Aneesh Kumar K . V, Paul Mackerras



Le 31/07/2019 à 14:32, Nicholas Piggin a écrit :
> This is an initial hack of a quick option to disable use of the tlbie
> instruction. This takes the simplest possible initial pass of just
> replacing low level tlbie functions with IPIs. This means it's not as
> performant as it could be if we spend some time optmizing it, but on
> the other hand having a 1:1 replacement of tlbie is simple and can be
> useful for comparisons so I think it's the right initial approach.

Can you explain why we want to optionnaly disable use of tlbie ?

Christophe

> 
> It's not entirely complete, doesn't deal with accelerators (reverts to
> tlbie), not all the boot code is converted, kernel space invalidations
> not converted, and KVM not converted, also radix only to start with. We
> can start to add more cases if this will be useful.
> 
> Thanks,
> Nick
> ---
>   arch/powerpc/mm/book3s64/radix_tlb.c | 149 +++++++++++++++++++++++++--
>   1 file changed, 140 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
> index 71f7fede2fa4..56ceecbd3d5c 100644
> --- a/arch/powerpc/mm/book3s64/radix_tlb.c
> +++ b/arch/powerpc/mm/book3s64/radix_tlb.c
> @@ -11,6 +11,7 @@
>   #include <linux/mmu_context.h>
>   #include <linux/sched/mm.h>
>   
> +#include <asm/debugfs.h>
>   #include <asm/ppc-opcode.h>
>   #include <asm/tlb.h>
>   #include <asm/tlbflush.h>
> @@ -285,6 +286,30 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
>   	asm volatile("eieio; tlbsync; ptesync": : :"memory");
>   }
>   
> +struct tlbiel_pid {
> +	unsigned long pid;
> +	unsigned long ric;
> +};
> +
> +static void do_tlbiel_pid(void *info)
> +{
> +	struct tlbiel_pid *t = info;
> +
> +	if (t->ric == RIC_FLUSH_TLB)
> +		_tlbiel_pid(t->pid, RIC_FLUSH_TLB);
> +	else if (t->ric == RIC_FLUSH_PWC)
> +		_tlbiel_pid(t->pid, RIC_FLUSH_PWC);
> +	else
> +		_tlbiel_pid(t->pid, RIC_FLUSH_ALL);
> +}
> +
> +static inline void _tlbiel_pid_broadcast(const struct cpumask *cpus,
> +				unsigned long pid, unsigned long ric)
> +{
> +	struct tlbiel_pid t = { .pid = pid, .ric = ric };
> +	on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
> +}
> +
>   static inline void _tlbiel_lpid(unsigned long lpid, unsigned long ric)
>   {
>   	int set;
> @@ -420,6 +445,61 @@ static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
>   	asm volatile("eieio; tlbsync; ptesync": : :"memory");
>   }
>   
> +struct tlbiel_va {
> +	unsigned long pid;
> +	unsigned long va;
> +	unsigned long psize;
> +	unsigned long ric;
> +};
> +
> +static void do_tlbiel_va(void *info)
> +{
> +	struct tlbiel_va *t = info;
> +
> +	if (t->ric == RIC_FLUSH_TLB)
> +		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
> +	else if (t->ric == RIC_FLUSH_PWC)
> +		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
> +	else
> +		_tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
> +}
> +
> +static inline void _tlbiel_va_broadcast(const struct cpumask *cpus,
> +				unsigned long va, unsigned long pid,
> +				unsigned long psize, unsigned long ric)
> +{
> +	struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
> +	on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
> +}
> +
> +struct tlbiel_va_range {
> +	unsigned long pid;
> +	unsigned long start;
> +	unsigned long end;
> +	unsigned long page_size;
> +	unsigned long psize;
> +	bool also_pwc;
> +};
> +
> +static void do_tlbiel_va_range(void *info)
> +{
> +	struct tlbiel_va_range *t = info;
> +
> +	_tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
> +				    t->psize, t->also_pwc);
> +}
> +
> +static inline void _tlbiel_va_range_broadcast(const struct cpumask *cpus,
> +				unsigned long start, unsigned long end,
> +				unsigned long pid, unsigned long page_size,
> +				unsigned long psize, bool also_pwc)
> +{
> +	struct tlbiel_va_range t = { .start = start, .end = end,
> +				.pid = pid, .page_size = page_size,
> +				.psize = psize, .also_pwc = also_pwc };
> +	on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
> +}
> +
>   static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
>   			      unsigned long psize, unsigned long ric)
>   {
> @@ -524,6 +604,12 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm)
>   	return false;
>   }
>   
> +static bool tlbie_enabled = true;
> +static bool use_tlbie(void)
> +{
> +	return tlbie_enabled;
> +}
> +
>   #ifdef CONFIG_SMP
>   static void do_exit_flush_lazy_tlb(void *arg)
>   {
> @@ -582,8 +668,10 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
>   
>   		if (mm_needs_flush_escalation(mm))
>   			_tlbie_pid(pid, RIC_FLUSH_ALL);
> -		else
> +		else if (use_tlbie())
>   			_tlbie_pid(pid, RIC_FLUSH_TLB);
> +		else
> +			_tlbiel_pid_broadcast(mm_cpumask(mm), pid, RIC_FLUSH_TLB);
>   	} else {
>   local:
>   		_tlbiel_pid(pid, RIC_FLUSH_TLB);
> @@ -609,7 +697,10 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
>   				goto local;
>   			}
>   		}
> -		_tlbie_pid(pid, RIC_FLUSH_ALL);
> +		if (mm_needs_flush_escalation(mm) || use_tlbie())
> +			_tlbie_pid(pid, RIC_FLUSH_ALL);
> +		else
> +			_tlbiel_pid_broadcast(mm_cpumask(mm), pid, RIC_FLUSH_ALL);
>   	} else {
>   local:
>   		_tlbiel_pid(pid, RIC_FLUSH_ALL);
> @@ -644,7 +735,11 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
>   			exit_flush_lazy_tlbs(mm);
>   			goto local;
>   		}
> -		_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
> +		if (mm_needs_flush_escalation(mm) || use_tlbie())
> +			_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
> +		else
> +			_tlbiel_va_broadcast(mm_cpumask(mm),
> +				       vmaddr, pid, psize, RIC_FLUSH_TLB);
>   	} else {
>   local:
>   		_tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
> @@ -731,8 +826,11 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
>   		} else {
>   			if (mm_needs_flush_escalation(mm))
>   				_tlbie_pid(pid, RIC_FLUSH_ALL);
> -			else
> +			else if (use_tlbie())
>   				_tlbie_pid(pid, RIC_FLUSH_TLB);
> +			else
> +				_tlbiel_pid_broadcast(mm_cpumask(mm),
> +						pid, RIC_FLUSH_TLB);
>   		}
>   	} else {
>   		bool hflush = flush_all_sizes;
> @@ -757,8 +855,8 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
>   				gflush = false;
>   		}
>   
> -		asm volatile("ptesync": : :"memory");
>   		if (local) {
> +			asm volatile("ptesync": : :"memory");
>   			__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
>   			if (hflush)
>   				__tlbiel_va_range(hstart, hend, pid,
> @@ -767,7 +865,8 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
>   				__tlbiel_va_range(gstart, gend, pid,
>   						PUD_SIZE, MMU_PAGE_1G);
>   			asm volatile("ptesync": : :"memory");
> -		} else {
> +		} else if (use_tlbie()) {
> +			asm volatile("ptesync": : :"memory");
>   			__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
>   			if (hflush)
>   				__tlbie_va_range(hstart, hend, pid,
> @@ -777,6 +876,15 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
>   						PUD_SIZE, MMU_PAGE_1G);
>   			fixup_tlbie();
>   			asm volatile("eieio; tlbsync; ptesync": : :"memory");
> +		} else {
> +			_tlbiel_va_range_broadcast(mm_cpumask(mm),
> +					start, end, pid, page_size, mmu_virtual_psize, false);
> +			if (hflush)
> +				_tlbiel_va_range_broadcast(mm_cpumask(mm),
> +					hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
> +			if (gflush)
> +				_tlbiel_va_range_broadcast(mm_cpumask(mm),
> +					gstart, gend, pid, PUD_SIZE, MMU_PAGE_1G, false);
>   		}
>   	}
>   	preempt_enable();
> @@ -969,13 +1077,22 @@ static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
>   			if (mm_needs_flush_escalation(mm))
>   				also_pwc = true;
>   
> -			_tlbie_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
> +			if (use_tlbie())
> +				_tlbie_pid(pid,
> +					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
> +			else
> +				_tlbiel_pid_broadcast(mm_cpumask(mm), pid,
> +					also_pwc ?  RIC_FLUSH_ALL : RIC_FLUSH_TLB);
> +
>   		}
>   	} else {
>   		if (local)
>   			_tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
> -		else
> +		else if (mm_needs_flush_escalation(mm) || use_tlbie())
>   			_tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
> +		else
> +			_tlbiel_va_range_broadcast(mm_cpumask(mm),
> +					start, end, pid, page_size, psize, also_pwc);
>   	}
>   	preempt_enable();
>   }
> @@ -1017,7 +1134,11 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
>   			exit_flush_lazy_tlbs(mm);
>   			goto local;
>   		}
> -		_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
> +		if (mm_needs_flush_escalation(mm) || use_tlbie())
> +			_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
> +		else
> +			_tlbiel_va_range_broadcast(mm_cpumask(mm),
> +					addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
>   	} else {
>   local:
>   		_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
> @@ -1100,3 +1221,13 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
>   }
>   EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
>   #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
> +
> +static int __init radix_tlb_setup(void)
> +{
> +	debugfs_create_bool("tlbie_enabled", 0600,
> +			powerpc_debugfs_root,
> +			&tlbie_enabled);
> +
> +	return 0;
> +}
> +arch_initcall(radix_tlb_setup);
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC PATCH] powerpc/64s/radix: introduce option to disable broadcast tlbie
  2019-07-31 13:56 ` Christophe Leroy
@ 2019-07-31 23:05   ` Nicholas Piggin
  0 siblings, 0 replies; 4+ messages in thread
From: Nicholas Piggin @ 2019-07-31 23:05 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev; +Cc: Aneesh Kumar K . V, Paul Mackerras

Christophe Leroy's on July 31, 2019 11:56 pm:
> 
> 
> Le 31/07/2019 à 14:32, Nicholas Piggin a écrit :
>> This is an initial hack of a quick option to disable use of the tlbie
>> instruction. This takes the simplest possible initial pass of just
>> replacing low level tlbie functions with IPIs. This means it's not as
>> performant as it could be if we spend some time optmizing it, but on
>> the other hand having a 1:1 replacement of tlbie is simple and can be
>> useful for comparisons so I think it's the right initial approach.
> 
> Can you explain why we want to optionnaly disable use of tlbie ?

It's something we've wanted to have more control of for a while.

One is for testing performance, especially on large systems it is not
always best to use tlbie. For example if you have threaded apps that
only run on a few CPUs (e.g., process * thread hybrid process model
server).

There is also concern about coherent accelerators may have high
latency to respond to tlbie, which can block the resource for others.

And we did have a hardware errata with early POWER9 it would have
been nice to disable it until the kernel could be updated with the
workaround.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC PATCH] powerpc/64s/radix: introduce option to disable broadcast tlbie
  2019-07-31 12:32 [RFC PATCH] powerpc/64s/radix: introduce option to disable broadcast tlbie Nicholas Piggin
  2019-07-31 13:56 ` Christophe Leroy
@ 2019-08-14  2:06 ` Michael Ellerman
  1 sibling, 0 replies; 4+ messages in thread
From: Michael Ellerman @ 2019-08-14  2:06 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev
  Cc: Aneesh Kumar K . V, Paul Mackerras, Nicholas Piggin

Hi Nick,

Just a few comments.

Nicholas Piggin <npiggin@gmail.com> writes:
> diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
> index 71f7fede2fa4..56ceecbd3d5c 100644
> --- a/arch/powerpc/mm/book3s64/radix_tlb.c
> +++ b/arch/powerpc/mm/book3s64/radix_tlb.c
> @@ -285,6 +286,30 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
>  	asm volatile("eieio; tlbsync; ptesync": : :"memory");
>  }
>  
> +struct tlbiel_pid {
> +	unsigned long pid;
> +	unsigned long ric;
> +};
> +
> +static void do_tlbiel_pid(void *info)
> +{
> +	struct tlbiel_pid *t = info;
> +
> +	if (t->ric == RIC_FLUSH_TLB)
> +		_tlbiel_pid(t->pid, RIC_FLUSH_TLB);
> +	else if (t->ric == RIC_FLUSH_PWC)
> +		_tlbiel_pid(t->pid, RIC_FLUSH_PWC);
> +	else
> +		_tlbiel_pid(t->pid, RIC_FLUSH_ALL);
> +}
> +
> +static inline void _tlbiel_pid_broadcast(const struct cpumask *cpus,
> +				unsigned long pid, unsigned long ric)

Can we call these "multicast" instead of "broadcast"?

I think that's more accurate, and avoids confusion with tlbie which
literally does a broadcast (at least architecturally).

> @@ -524,6 +604,12 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm)
>  	return false;
>  }
>  
> +static bool tlbie_enabled = true;
> +static bool use_tlbie(void)
> +{
> +	return tlbie_enabled;
> +}

No synchronisation, but that's OK. Would probably be good to have
a comment though explaining why.

We could use a static_key but I guess the overhead of a comparison and
branch is in the noise vs the tlbie/tlbiel.

> @@ -1100,3 +1221,13 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
>  }
>  EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
>  #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
> +
> +static int __init radix_tlb_setup(void)
> +{
> +	debugfs_create_bool("tlbie_enabled", 0600,
> +			powerpc_debugfs_root,
> +			&tlbie_enabled);
> +
> +	return 0;
> +}
> +arch_initcall(radix_tlb_setup);

For working around hardware bugs we would want a command line parameter
or other boot time way to flip this. But I guess you're saying because
we haven't converted all uses of tlbie we can't really support that
anyway, and so a runtime switch is sufficient?

cheers

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, back to index

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-31 12:32 [RFC PATCH] powerpc/64s/radix: introduce option to disable broadcast tlbie Nicholas Piggin
2019-07-31 13:56 ` Christophe Leroy
2019-07-31 23:05   ` Nicholas Piggin
2019-08-14  2:06 ` Michael Ellerman

LinuxPPC-Dev Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linuxppc-dev/0 linuxppc-dev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linuxppc-dev linuxppc-dev/ https://lore.kernel.org/linuxppc-dev \
		linuxppc-dev@lists.ozlabs.org linuxppc-dev@ozlabs.org linuxppc-dev@archiver.kernel.org
	public-inbox-index linuxppc-dev


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.ozlabs.lists.linuxppc-dev


AGPL code for this site: git clone https://public-inbox.org/ public-inbox