All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/3] powerpc/mm: Export flush_all_mm()
@ 2017-08-30 10:15 Frederic Barrat
  2017-08-30 10:15 ` [PATCH v2 2/3] cxl: Fix driver use count Frederic Barrat
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Frederic Barrat @ 2017-08-30 10:15 UTC (permalink / raw)
  To: mpe, linuxppc-dev, benh, andrew.donnellan, clombard, vaibhav; +Cc: alistair

With the optimizations introduced by commit a46cc7a90fd8
("powerpc/mm/radix: Improve TLB/PWC flushes"), flush_tlb_mm() no
longer flushes the page walk cache with radix. This patch introduces
flush_all_mm(), which flushes everything, tlb and pwc, for a given mm.

Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
---
Changelog:
v2: this patch is new

 arch/powerpc/include/asm/book3s/64/tlbflush-hash.h  |  8 ++++++++
 arch/powerpc/include/asm/book3s/64/tlbflush-radix.h |  3 +++
 arch/powerpc/include/asm/book3s/64/tlbflush.h       | 15 +++++++++++++++
 arch/powerpc/mm/tlb-radix.c                         |  6 ++++--
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index 2f6373144e2c..c5d89d271a96 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -65,6 +65,14 @@ static inline void hash__flush_tlb_mm(struct mm_struct *mm)
 {
 }
 
+static inline void hash__local_flush_all_mm(struct mm_struct *mm)
+{
+}
+
+static inline void hash__flush_all_mm(struct mm_struct *mm)
+{
+}
+
 static inline void hash__local_flush_tlb_page(struct vm_area_struct *vma,
 					  unsigned long vmaddr)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 9b433a624bf3..af06c6fe8a9f 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -21,17 +21,20 @@ extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long sta
 extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
 extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
+extern void radix__local_flush_all_mm(struct mm_struct *mm);
 extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
 extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 					      int psize);
 extern void radix__tlb_flush(struct mmu_gather *tlb);
 #ifdef CONFIG_SMP
 extern void radix__flush_tlb_mm(struct mm_struct *mm);
+extern void radix__flush_all_mm(struct mm_struct *mm);
 extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
 extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
 					int psize);
 #else
 #define radix__flush_tlb_mm(mm)		radix__local_flush_tlb_mm(mm)
+#define radix__flush_all_mm(mm)		radix__local_flush_all_mm(mm)
 #define radix__flush_tlb_page(vma,addr)	radix__local_flush_tlb_page(vma,addr)
 #define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p)
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 72b925f97bab..70760d018bcd 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -57,6 +57,13 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma,
 	return hash__local_flush_tlb_page(vma, vmaddr);
 }
 
+static inline void local_flush_all_mm(struct mm_struct *mm)
+{
+	if (radix_enabled())
+		return radix__local_flush_all_mm(mm);
+	return hash__local_flush_all_mm(mm);
+}
+
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
 	if (radix_enabled())
@@ -79,9 +86,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 		return radix__flush_tlb_page(vma, vmaddr);
 	return hash__flush_tlb_page(vma, vmaddr);
 }
+
+static inline void flush_all_mm(struct mm_struct *mm)
+{
+	if (radix_enabled())
+		return radix__flush_all_mm(mm);
+	return hash__flush_all_mm(mm);
+}
 #else
 #define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
 #define flush_tlb_page(vma, addr)	local_flush_tlb_page(vma, addr)
+#define flush_all_mm(mm)		local_flush_all_mm(mm)
 #endif /* CONFIG_SMP */
 /*
  * flush the page walk cache for the address
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index b3e849c4886e..5a1f46eff3a2 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -144,7 +144,7 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm)
 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
 
 #ifndef CONFIG_SMP
-static void radix__local_flush_all_mm(struct mm_struct *mm)
+void radix__local_flush_all_mm(struct mm_struct *mm)
 {
 	unsigned long pid;
 
@@ -154,6 +154,7 @@ static void radix__local_flush_all_mm(struct mm_struct *mm)
 		_tlbiel_pid(pid, RIC_FLUSH_ALL);
 	preempt_enable();
 }
+EXPORT_SYMBOL(radix__local_flush_all_mm);
 #endif /* CONFIG_SMP */
 
 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
@@ -200,7 +201,7 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
 }
 EXPORT_SYMBOL(radix__flush_tlb_mm);
 
-static void radix__flush_all_mm(struct mm_struct *mm)
+void radix__flush_all_mm(struct mm_struct *mm)
 {
 	unsigned long pid;
 
@@ -216,6 +217,7 @@ static void radix__flush_all_mm(struct mm_struct *mm)
 no_context:
 	preempt_enable();
 }
+EXPORT_SYMBOL(radix__flush_all_mm);
 
 void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
 {
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 2/3] cxl: Fix driver use count
  2017-08-30 10:15 [PATCH v2 1/3] powerpc/mm: Export flush_all_mm() Frederic Barrat
@ 2017-08-30 10:15 ` Frederic Barrat
  2017-08-30 13:18   ` Michael Ellerman
  2017-08-31 11:36   ` [v2,2/3] " Michael Ellerman
  2017-08-30 10:15 ` [PATCH v2 3/3] cxl: Enable global TLBIs for cxl contexts Frederic Barrat
  2017-08-30 13:17 ` [PATCH v2 1/3] powerpc/mm: Export flush_all_mm() Michael Ellerman
  2 siblings, 2 replies; 10+ messages in thread
From: Frederic Barrat @ 2017-08-30 10:15 UTC (permalink / raw)
  To: mpe, linuxppc-dev, benh, andrew.donnellan, clombard, vaibhav; +Cc: alistair

cxl keeps a driver use count, which is used with the hash memory model
on p8 to know when to upgrade local TLBIs to global and to trigger
callbacks to manage the MMU for PSL8.

If a process opens a context and closes without attaching or fails the
attachment, the driver use count is never decremented. As a
consequence, TLB invalidations remain global, even if there are no
active cxl contexts.

We should increment the driver use count when the process is attaching
to the cxl adapter, and not on open. It's not needed before the
adapter starts using the context and the use count is decremented on
the detach path, so it makes more sense.

It affects only the user api. The kernel api is already doing The
Right Thing.

Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Cc: stable@vger.kernel.org # v4.2+
Fixes: 7bb5d91a4dda ("cxl: Rework context lifetimes")
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
---
Changelog:
v2: fix typo in comments (Thanks to Andrew)

 drivers/misc/cxl/api.c  | 4 ++++
 drivers/misc/cxl/file.c | 8 +++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 1a138c83f877..a0c44d16bf30 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -336,6 +336,10 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
 			mmput(ctx->mm);
 	}
 
+	/*
+	 * Increment driver use count. Enables global TLBIs for hash
+	 * and callbacks to handle the segment table
+	 */
 	cxl_ctx_get();
 
 	if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 0761271d68c5..4bfad9f6dc9f 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -95,7 +95,6 @@ static int __afu_open(struct inode *inode, struct file *file, bool master)
 
 	pr_devel("afu_open pe: %i\n", ctx->pe);
 	file->private_data = ctx;
-	cxl_ctx_get();
 
 	/* indicate success */
 	rc = 0;
@@ -225,6 +224,12 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 	if (ctx->mm)
 		mmput(ctx->mm);
 
+	/*
+	 * Increment driver use count. Enables global TLBIs for hash
+	 * and callbacks to handle the segment table
+	 */
+	cxl_ctx_get();
+
 	trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
 
 	if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
@@ -233,6 +238,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 		cxl_adapter_context_put(ctx->afu->adapter);
 		put_pid(ctx->pid);
 		ctx->pid = NULL;
+		cxl_ctx_put();
 		cxl_context_mm_count_put(ctx);
 		goto out;
 	}
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 3/3] cxl: Enable global TLBIs for cxl contexts
  2017-08-30 10:15 [PATCH v2 1/3] powerpc/mm: Export flush_all_mm() Frederic Barrat
  2017-08-30 10:15 ` [PATCH v2 2/3] cxl: Fix driver use count Frederic Barrat
@ 2017-08-30 10:15 ` Frederic Barrat
  2017-08-30 13:17 ` [PATCH v2 1/3] powerpc/mm: Export flush_all_mm() Michael Ellerman
  2 siblings, 0 replies; 10+ messages in thread
From: Frederic Barrat @ 2017-08-30 10:15 UTC (permalink / raw)
  To: mpe, linuxppc-dev, benh, andrew.donnellan, clombard, vaibhav; +Cc: alistair

The PSL and nMMU need to see all TLB invalidations for the memory
contexts used on the adapter. For the hash memory model, it is done by
making all TLBIs global as soon as the cxl driver is in use. For
radix, we need something similar, but we can refine and only convert
to global the invalidations for contexts actually used by the device.

So increment the 'active_cpus' count for the contexts attached to the
cxl adapter. As soon as there's more than 1 active cpu, the TLBIs for
the context become global. Active cpu count must be decremented when
detaching to restore locality if possible and to avoid overflowing the
counter.

Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
---
Changelog:
v2: Replace flush_tlb_mm() by the new flush_all_mm() to flush the TLBs
and PWCs (thanks to Ben)

 arch/powerpc/include/asm/mmu_context.h | 35 ++++++++++++++++++++++++++++++++++
 arch/powerpc/mm/mmu_context.c          |  9 ---------
 drivers/misc/cxl/api.c                 | 22 ++++++++++++++++++---
 drivers/misc/cxl/context.c             |  3 +++
 drivers/misc/cxl/file.c                | 19 ++++++++++++++++--
 5 files changed, 74 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 309592589e30..71a1d01ff206 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -77,6 +77,41 @@ extern void switch_cop(struct mm_struct *next);
 extern int use_cop(unsigned long acop, struct mm_struct *mm);
 extern void drop_cop(unsigned long acop, struct mm_struct *mm);
 
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline void inc_mm_active_cpus(struct mm_struct *mm)
+{
+	atomic_inc(&mm->context.active_cpus);
+}
+
+static inline void dec_mm_active_cpus(struct mm_struct *mm)
+{
+	atomic_dec(&mm->context.active_cpus);
+}
+
+static inline void mm_context_add_copro(struct mm_struct *mm)
+{
+	inc_mm_active_cpus(mm);
+}
+
+static inline void mm_context_remove_copro(struct mm_struct *mm)
+{
+	/*
+	 * Need to broadcast a global flush of the full mm before
+	 * decrementing active_cpus count, as the next TLBI may be
+	 * local and the nMMU and/or PSL need to be cleaned up.
+	 * Should be rare enough so that it's acceptable.
+	 */
+	flush_all_mm(mm);
+	dec_mm_active_cpus(mm);
+}
+#else
+static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
+static inline void dec_mm_active_cpus(struct mm_struct *mm) { }
+static inline void mm_context_add_copro(struct mm_struct *mm) { }
+static inline void mm_context_remove_copro(struct mm_struct *mm) { }
+#endif
+
+
 extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			       struct task_struct *tsk);
 
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 0f613bc63c50..d60a62bf4fc7 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -34,15 +34,6 @@ static inline void switch_mm_pgdir(struct task_struct *tsk,
 				   struct mm_struct *mm) { }
 #endif
 
-#ifdef CONFIG_PPC_BOOK3S_64
-static inline void inc_mm_active_cpus(struct mm_struct *mm)
-{
-	atomic_inc(&mm->context.active_cpus);
-}
-#else
-static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
-#endif
-
 void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			struct task_struct *tsk)
 {
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index a0c44d16bf30..1137a2cc1d3e 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
 
 #include "cxl.h"
 
@@ -331,9 +332,12 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
 		/* ensure this mm_struct can't be freed */
 		cxl_context_mm_count_get(ctx);
 
-		/* decrement the use count */
-		if (ctx->mm)
+		if (ctx->mm) {
+			/* decrement the use count from above */
 			mmput(ctx->mm);
+			/* make TLBIs for this context global */
+			mm_context_add_copro(ctx->mm);
+		}
 	}
 
 	/*
@@ -342,13 +346,25 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
 	 */
 	cxl_ctx_get();
 
+	/*
+	 * Barrier is needed to make sure all TLBIs are global before
+	 * we attach and the context starts being used by the adapter.
+	 *
+	 * Needed after mm_context_add_copro() for radix and
+	 * cxl_ctx_get() for hash/p8
+	 */
+	smp_mb();
+
 	if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
 		put_pid(ctx->pid);
 		ctx->pid = NULL;
 		cxl_adapter_context_put(ctx->afu->adapter);
 		cxl_ctx_put();
-		if (task)
+		if (task) {
 			cxl_context_mm_count_put(ctx);
+			if (ctx->mm)
+				mm_context_remove_copro(ctx->mm);
+		}
 		goto out;
 	}
 
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 8c32040b9c09..12a41b2753f0 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
 #include <asm/cputable.h>
 #include <asm/current.h>
 #include <asm/copro.h>
@@ -267,6 +268,8 @@ int __detach_context(struct cxl_context *ctx)
 
 	/* Decrease the mm count on the context */
 	cxl_context_mm_count_put(ctx);
+	if (ctx->mm)
+		mm_context_remove_copro(ctx->mm);
 	ctx->mm = NULL;
 
 	return 0;
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 4bfad9f6dc9f..84b801b5d0e5 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -19,6 +19,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
 #include <asm/cputable.h>
 #include <asm/current.h>
 #include <asm/copro.h>
@@ -220,9 +221,12 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 	/* ensure this mm_struct can't be freed */
 	cxl_context_mm_count_get(ctx);
 
-	/* decrement the use count */
-	if (ctx->mm)
+	if (ctx->mm) {
+		/* decrement the use count from above */
 		mmput(ctx->mm);
+		/* make TLBIs for this context global */
+		mm_context_add_copro(ctx->mm);
+	}
 
 	/*
 	 * Increment driver use count. Enables global TLBIs for hash
@@ -230,6 +234,15 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 	 */
 	cxl_ctx_get();
 
+	/*
+	 * Barrier is needed to make sure all TLBIs are global before
+	 * we attach and the context starts being used by the adapter.
+	 *
+	 * Needed after mm_context_add_copro() for radix and
+	 * cxl_ctx_get() for hash/p8
+	 */
+	smp_mb();
+
 	trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
 
 	if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
@@ -240,6 +253,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 		ctx->pid = NULL;
 		cxl_ctx_put();
 		cxl_context_mm_count_put(ctx);
+		if (ctx->mm)
+			mm_context_remove_copro(ctx->mm);
 		goto out;
 	}
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/3] powerpc/mm: Export flush_all_mm()
  2017-08-30 10:15 [PATCH v2 1/3] powerpc/mm: Export flush_all_mm() Frederic Barrat
  2017-08-30 10:15 ` [PATCH v2 2/3] cxl: Fix driver use count Frederic Barrat
  2017-08-30 10:15 ` [PATCH v2 3/3] cxl: Enable global TLBIs for cxl contexts Frederic Barrat
@ 2017-08-30 13:17 ` Michael Ellerman
  2017-08-30 13:59   ` Frederic Barrat
  2017-08-30 21:00   ` Benjamin Herrenschmidt
  2 siblings, 2 replies; 10+ messages in thread
From: Michael Ellerman @ 2017-08-30 13:17 UTC (permalink / raw)
  To: Frederic Barrat, linuxppc-dev, benh, andrew.donnellan, clombard, vaibhav
  Cc: alistair

Frederic Barrat <fbarrat@linux.vnet.ibm.com> writes:

> With the optimizations introduced by commit a46cc7a90fd8
> ("powerpc/mm/radix: Improve TLB/PWC flushes"), flush_tlb_mm() no
> longer flushes the page walk cache with radix. This patch introduces
> flush_all_mm(), which flushes everything, tlb and pwc, for a given mm.
>
> Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
> ---
> Changelog:
> v2: this patch is new
>
>  arch/powerpc/include/asm/book3s/64/tlbflush-hash.h  |  8 ++++++++
>  arch/powerpc/include/asm/book3s/64/tlbflush-radix.h |  3 +++
>  arch/powerpc/include/asm/book3s/64/tlbflush.h       | 15 +++++++++++++++
>  arch/powerpc/mm/tlb-radix.c                         |  6 ++++--
>  4 files changed, 30 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
> index 2f6373144e2c..c5d89d271a96 100644
> --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
> @@ -65,6 +65,14 @@ static inline void hash__flush_tlb_mm(struct mm_struct *mm)
>  {
>  }
>  
> +static inline void hash__local_flush_all_mm(struct mm_struct *mm)
> +{
> +}
> +
> +static inline void hash__flush_all_mm(struct mm_struct *mm)
> +{
> +}

It's not clear why it makes sense for these to be empty. Either for the
general idea of the "flush_all_mm()" API, or for your intended use by
CXL.

cheers

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 2/3] cxl: Fix driver use count
  2017-08-30 10:15 ` [PATCH v2 2/3] cxl: Fix driver use count Frederic Barrat
@ 2017-08-30 13:18   ` Michael Ellerman
  2017-08-31 11:36   ` [v2,2/3] " Michael Ellerman
  1 sibling, 0 replies; 10+ messages in thread
From: Michael Ellerman @ 2017-08-30 13:18 UTC (permalink / raw)
  To: Frederic Barrat, linuxppc-dev, benh, andrew.donnellan, clombard, vaibhav
  Cc: alistair

Frederic Barrat <fbarrat@linux.vnet.ibm.com> writes:

> cxl keeps a driver use count, which is used with the hash memory model
> on p8 to know when to upgrade local TLBIs to global and to trigger
> callbacks to manage the MMU for PSL8.
>
> If a process opens a context and closes without attaching or fails the
> attachment, the driver use count is never decremented. As a
> consequence, TLB invalidations remain global, even if there are no
> active cxl contexts.
>
> We should increment the driver use count when the process is attaching
> to the cxl adapter, and not on open. It's not needed before the
> adapter starts using the context and the use count is decremented on
> the detach path, so it makes more sense.
>
> It affects only the user api. The kernel api is already doing The
> Right Thing.
>
> Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
> Cc: stable@vger.kernel.org # v4.2+
> Fixes: 7bb5d91a4dda ("cxl: Rework context lifetimes")
> Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>

I'll pick this up straight away, because it's a fix.

So you can drop this from the series.

cheers

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/3] powerpc/mm: Export flush_all_mm()
  2017-08-30 13:17 ` [PATCH v2 1/3] powerpc/mm: Export flush_all_mm() Michael Ellerman
@ 2017-08-30 13:59   ` Frederic Barrat
  2017-08-30 21:01     ` Benjamin Herrenschmidt
  2017-08-30 21:00   ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 10+ messages in thread
From: Frederic Barrat @ 2017-08-30 13:59 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev, benh, andrew.donnellan, clombard,
	vaibhav
  Cc: alistair



Le 30/08/2017 à 15:17, Michael Ellerman a écrit :
> Frederic Barrat <fbarrat@linux.vnet.ibm.com> writes:
> 
>> With the optimizations introduced by commit a46cc7a90fd8
>> ("powerpc/mm/radix: Improve TLB/PWC flushes"), flush_tlb_mm() no
>> longer flushes the page walk cache with radix. This patch introduces
>> flush_all_mm(), which flushes everything, tlb and pwc, for a given mm.
>>
>> Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
>> ---
>> Changelog:
>> v2: this patch is new
>>
>>   arch/powerpc/include/asm/book3s/64/tlbflush-hash.h  |  8 ++++++++
>>   arch/powerpc/include/asm/book3s/64/tlbflush-radix.h |  3 +++
>>   arch/powerpc/include/asm/book3s/64/tlbflush.h       | 15 +++++++++++++++
>>   arch/powerpc/mm/tlb-radix.c                         |  6 ++++--
>>   4 files changed, 30 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
>> index 2f6373144e2c..c5d89d271a96 100644
>> --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
>> +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
>> @@ -65,6 +65,14 @@ static inline void hash__flush_tlb_mm(struct mm_struct *mm)
>>   {
>>   }
>>   
>> +static inline void hash__local_flush_all_mm(struct mm_struct *mm)
>> +{
>> +}
>> +
>> +static inline void hash__flush_all_mm(struct mm_struct *mm)
>> +{
>> +}
> 
> It's not clear why it makes sense for these to be empty. Either for the
> general idea of the "flush_all_mm()" API, or for your intended use by
> CXL.

I was not too sure what to do for hash, but the idea is that the new 
flush_all_mm() is really the equivalent of the old flush_tlb_mm() from 
before Ben's optimizations for radix, and that was/still is an empty 
operation on hash, so I kept it that way.

We don't support hash for capi2 yet. Adding it will definitely require 
some work in that area, as the current approach (use count on the driver 
and all TLBIs becoming global when the driver is in use) won't hold much 
longer.

   Fred

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/3] powerpc/mm: Export flush_all_mm()
  2017-08-30 13:17 ` [PATCH v2 1/3] powerpc/mm: Export flush_all_mm() Michael Ellerman
  2017-08-30 13:59   ` Frederic Barrat
@ 2017-08-30 21:00   ` Benjamin Herrenschmidt
  1 sibling, 0 replies; 10+ messages in thread
From: Benjamin Herrenschmidt @ 2017-08-30 21:00 UTC (permalink / raw)
  To: Michael Ellerman, Frederic Barrat, linuxppc-dev,
	andrew.donnellan, clombard, vaibhav
  Cc: alistair

On Wed, 2017-08-30 at 23:17 +1000, Michael Ellerman wrote:
> It's not clear why it makes sense for these to be empty. Either for the
> general idea of the "flush_all_mm()" API, or for your intended use by
> CXL.

Indeed. On hash we don't have a way to flush a PID out of the TLB,
but you can flush the whole LPID.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/3] powerpc/mm: Export flush_all_mm()
  2017-08-30 13:59   ` Frederic Barrat
@ 2017-08-30 21:01     ` Benjamin Herrenschmidt
  2017-08-31  9:13       ` Frederic Barrat
  0 siblings, 1 reply; 10+ messages in thread
From: Benjamin Herrenschmidt @ 2017-08-30 21:01 UTC (permalink / raw)
  To: Frederic Barrat, Michael Ellerman, linuxppc-dev,
	andrew.donnellan, clombard, vaibhav
  Cc: alistair

On Wed, 2017-08-30 at 15:59 +0200, Frederic Barrat wrote:
> > It's not clear why it makes sense for these to be empty. Either for the
> > general idea of the "flush_all_mm()" API, or for your intended use by
> > CXL.
> 
> I was not too sure what to do for hash, but the idea is that the new 
> flush_all_mm() is really the equivalent of the old flush_tlb_mm() from 
> before Ben's optimizations for radix, and that was/still is an empty 
> operation on hash, so I kept it that way.
> 
> We don't support hash for capi2 yet. Adding it will definitely require 
> some work in that area, as the current approach (use count on the driver 
> and all TLBIs becoming global when the driver is in use) won't hold much 
> longer.

Why not ? It would work fine on hash, but you do need a way to flush
the TLB when decreasing the count indeed and that's missing for hash.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 1/3] powerpc/mm: Export flush_all_mm()
  2017-08-30 21:01     ` Benjamin Herrenschmidt
@ 2017-08-31  9:13       ` Frederic Barrat
  0 siblings, 0 replies; 10+ messages in thread
From: Frederic Barrat @ 2017-08-31  9:13 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Michael Ellerman, linuxppc-dev,
	andrew.donnellan, clombard, vaibhav
  Cc: alistair



Le 30/08/2017 à 23:01, Benjamin Herrenschmidt a écrit :
> On Wed, 2017-08-30 at 15:59 +0200, Frederic Barrat wrote:
>>> It's not clear why it makes sense for these to be empty. Either for the
>>> general idea of the "flush_all_mm()" API, or for your intended use by
>>> CXL.
>>
>> I was not too sure what to do for hash, but the idea is that the new
>> flush_all_mm() is really the equivalent of the old flush_tlb_mm() from
>> before Ben's optimizations for radix, and that was/still is an empty
>> operation on hash, so I kept it that way.
>>
>> We don't support hash for capi2 yet. Adding it will definitely require
>> some work in that area, as the current approach (use count on the driver
>> and all TLBIs becoming global when the driver is in use) won't hold much
>> longer.
> 
> Why not ? It would work fine on hash, but you do need a way to flush
> the TLB when decreasing the count indeed and that's missing for hash.

For the future support of hash, the problem is actually not for capi2, 
but for opencapi, where we can have more than one driver. So the driver 
use count approach would need to be reworked (today the use count is in 
the driver itself). What we do for radix is better, since we make the 
TLBI global only when required, per context. But it is not trivial to do 
the same for hash, as we typically don't have the context handy when we 
send the TLBI. So we'll have to do *something* when we add support for hash.

For my more pressing needs, I'll look into triggering a LPID flush for 
the time being. It's too much but safe, and can be re-assess later when 
we add hash support.

   Fred

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [v2,2/3] cxl: Fix driver use count
  2017-08-30 10:15 ` [PATCH v2 2/3] cxl: Fix driver use count Frederic Barrat
  2017-08-30 13:18   ` Michael Ellerman
@ 2017-08-31 11:36   ` Michael Ellerman
  1 sibling, 0 replies; 10+ messages in thread
From: Michael Ellerman @ 2017-08-31 11:36 UTC (permalink / raw)
  To: Frederic Barrat, linuxppc-dev, benh, andrew.donnellan, clombard, vaibhav
  Cc: alistair

On Wed, 2017-08-30 at 10:15:49 UTC, Frederic Barrat wrote:
> cxl keeps a driver use count, which is used with the hash memory model
> on p8 to know when to upgrade local TLBIs to global and to trigger
> callbacks to manage the MMU for PSL8.
> 
> If a process opens a context and closes without attaching or fails the
> attachment, the driver use count is never decremented. As a
> consequence, TLB invalidations remain global, even if there are no
> active cxl contexts.
> 
> We should increment the driver use count when the process is attaching
> to the cxl adapter, and not on open. It's not needed before the
> adapter starts using the context and the use count is decremented on
> the detach path, so it makes more sense.
> 
> It affects only the user api. The kernel api is already doing The
> Right Thing.
> 
> Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
> Cc: stable@vger.kernel.org # v4.2+
> Fixes: 7bb5d91a4dda ("cxl: Rework context lifetimes")
> Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/197267d0356004a31c4d6b6336598f

cheers

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-08-31 11:36 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-30 10:15 [PATCH v2 1/3] powerpc/mm: Export flush_all_mm() Frederic Barrat
2017-08-30 10:15 ` [PATCH v2 2/3] cxl: Fix driver use count Frederic Barrat
2017-08-30 13:18   ` Michael Ellerman
2017-08-31 11:36   ` [v2,2/3] " Michael Ellerman
2017-08-30 10:15 ` [PATCH v2 3/3] cxl: Enable global TLBIs for cxl contexts Frederic Barrat
2017-08-30 13:17 ` [PATCH v2 1/3] powerpc/mm: Export flush_all_mm() Michael Ellerman
2017-08-30 13:59   ` Frederic Barrat
2017-08-30 21:01     ` Benjamin Herrenschmidt
2017-08-31  9:13       ` Frederic Barrat
2017-08-30 21:00   ` Benjamin Herrenschmidt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.