All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] powernv/npu: Add lock to prevent race in concurrent context init/destroy
@ 2018-04-11  6:38 Alistair Popple
  2018-04-11  6:38 ` [PATCH 2/2] powernv/npu: Prevent overwriting of pnv_npu2_init_contex() callback parameters Alistair Popple
                   ` (4 more replies)
  0 siblings, 5 replies; 9+ messages in thread
From: Alistair Popple @ 2018-04-11  6:38 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: mhairgrove, arbab, bsingharora, Alistair Popple

The pnv_npu2_init_context() and pnv_npu2_destroy_context() functions are
used to allocate/free contexts to allow address translation and shootdown
by the NPU on a particular GPU. Context initialisation is implicitly safe
as it is protected by the requirement mmap_sem be held in write mode,
however pnv_npu2_destroy_context() does not require mmap_sem to be held and
it is not safe to call with a concurrent initialisation for a different
GPU.

It was assumed the driver would ensure destruction was not called
concurrently with initialisation. However the driver may be simplified by
allowing concurrent initialisation and destruction for different GPUs. As
npu context creation/destruction is not a performance critical path and the
critical section is not large a single spinlock is used for simplicity.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
---
 arch/powerpc/platforms/powernv/npu-dma.c | 51 ++++++++++++++++++++++++++------
 1 file changed, 42 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 1cbef1f9cd37..cb77162f4e7a 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -34,6 +34,12 @@
 #define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
 
 /*
+ * spinlock to protect initialisation of an npu_context for a particular
+ * mm_struct.
+ */
+DEFINE_SPINLOCK(npu_context_lock);
+
+/*
  * Other types of TCE cache invalidation are not functional in the
  * hardware.
  */
@@ -694,7 +700,8 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
  * Returns an error if there no contexts are currently available or a
  * npu_context which should be passed to pnv_npu2_handle_fault().
  *
- * mmap_sem must be held in write mode.
+ * mmap_sem must be held in write mode and must not be called from interrupt
+ * context.
  */
 struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
 			unsigned long flags,
@@ -741,7 +748,9 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
 	/*
 	 * Setup the NPU context table for a particular GPU. These need to be
 	 * per-GPU as we need the tables to filter ATSDs when there are no
-	 * active contexts on a particular GPU.
+	 * active contexts on a particular GPU. It is safe for these to be
+	 * called concurrently with destroy as the OPAL call takes appropriate
+	 * locks and refcounts on init/destroy.
 	 */
 	rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
 				PCI_DEVID(gpdev->bus->number, gpdev->devfn));
@@ -752,8 +761,19 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
 	 * We store the npu pci device so we can more easily get at the
 	 * associated npus.
 	 */
+	spin_lock(&npu_context_lock);
 	npu_context = mm->context.npu_context;
+	if (npu_context)
+		WARN_ON(!kref_get_unless_zero(&npu_context->kref));
+	spin_unlock(&npu_context_lock);
+
 	if (!npu_context) {
+		/*
+		 * We can set up these fields without holding the
+		 * npu_context_lock as the npu_context hasn't been returned to
+		 * the caller meaning it can't be destroyed. Parallel allocation
+		 * is protected against by mmap_sem.
+		 */
 		rc = -ENOMEM;
 		npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
 		if (npu_context) {
@@ -772,8 +792,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
 		}
 
 		mm->context.npu_context = npu_context;
-	} else {
-		WARN_ON(!kref_get_unless_zero(&npu_context->kref));
 	}
 
 	npu_context->release_cb = cb;
@@ -811,15 +829,16 @@ static void pnv_npu2_release_context(struct kref *kref)
 		mm_context_remove_copro(npu_context->mm);
 
 	npu_context->mm->context.npu_context = NULL;
-	mmu_notifier_unregister(&npu_context->mn,
-				npu_context->mm);
-
-	kfree(npu_context);
 }
 
+/*
+ * Destroy a context on the given GPU. May free the npu_context if it is no
+ * longer active on any GPUs. Must not be called from interrupt context.
+ */
 void pnv_npu2_destroy_context(struct npu_context *npu_context,
 			struct pci_dev *gpdev)
 {
+	int removed;
 	struct pnv_phb *nphb;
 	struct npu *npu;
 	struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
@@ -841,7 +860,21 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
 	WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
 	opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
 				PCI_DEVID(gpdev->bus->number, gpdev->devfn));
-	kref_put(&npu_context->kref, pnv_npu2_release_context);
+	spin_lock(&npu_context_lock);
+	removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
+	spin_unlock(&npu_context_lock);
+
+	/*
+	 * We need to do this outside of pnv_npu2_release_context so that it is
+	 * outside the spinlock as mmu_notifier_destroy uses SRCU.
+	 */
+	if (removed) {
+		mmu_notifier_unregister(&npu_context->mn,
+					npu_context->mm);
+
+		kfree(npu_context);
+	}
+
 }
 EXPORT_SYMBOL(pnv_npu2_destroy_context);
 
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] powernv/npu: Prevent overwriting of pnv_npu2_init_contex() callback parameters
  2018-04-11  6:38 [PATCH 1/2] powernv/npu: Add lock to prevent race in concurrent context init/destroy Alistair Popple
@ 2018-04-11  6:38 ` Alistair Popple
  2018-04-13  2:02   ` Mark Hairgrove
  2018-04-20  3:51   ` Alistair Popple
  2018-04-13  2:02 ` [PATCH 1/2] powernv/npu: Add lock to prevent race in concurrent context init/destroy Mark Hairgrove
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 9+ messages in thread
From: Alistair Popple @ 2018-04-11  6:38 UTC (permalink / raw)
  To: linuxppc-dev, mpe; +Cc: mhairgrove, arbab, bsingharora, Alistair Popple

There is a single npu context per set of callback parameters. Callers
should be prevented from overwriting existing callback values so instead
return an error if different parameters are passed.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
---
 arch/powerpc/include/asm/powernv.h       |  2 +-
 arch/powerpc/platforms/powernv/npu-dma.c | 16 +++++++++++++---
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
index dc5f6a5d4575..362ea12a4501 100644
--- a/arch/powerpc/include/asm/powernv.h
+++ b/arch/powerpc/include/asm/powernv.h
@@ -15,7 +15,7 @@
 extern void powernv_set_nmmu_ptcr(unsigned long ptcr);
 extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
 			unsigned long flags,
-			struct npu_context *(*cb)(struct npu_context *, void *),
+			void (*cb)(struct npu_context *, void *),
 			void *priv);
 extern void pnv_npu2_destroy_context(struct npu_context *context,
 				struct pci_dev *gpdev);
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index cb77162f4e7a..193f43ea3fbc 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -407,7 +407,7 @@ struct npu_context {
 	bool nmmu_flush;
 
 	/* Callback to stop translation requests on a given GPU */
-	struct npu_context *(*release_cb)(struct npu_context *, void *);
+	void (*release_cb)(struct npu_context *context, void *priv);
 
 	/*
 	 * Private pointer passed to the above callback for usage by
@@ -705,7 +705,7 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
  */
 struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
 			unsigned long flags,
-			struct npu_context *(*cb)(struct npu_context *, void *),
+			void (*cb)(struct npu_context *, void *),
 			void *priv)
 {
 	int rc;
@@ -763,8 +763,18 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
 	 */
 	spin_lock(&npu_context_lock);
 	npu_context = mm->context.npu_context;
-	if (npu_context)
+	if (npu_context) {
+		if (npu_context->release_cb != cb ||
+			npu_context->priv != priv) {
+			spin_unlock(&npu_context_lock);
+			opal_npu_destroy_context(nphb->opal_id, mm->context.id,
+						PCI_DEVID(gpdev->bus->number,
+							gpdev->devfn));
+			return ERR_PTR(-EINVAL);
+		}
+
 		WARN_ON(!kref_get_unless_zero(&npu_context->kref));
+	}
 	spin_unlock(&npu_context_lock);
 
 	if (!npu_context) {
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] powernv/npu: Add lock to prevent race in concurrent context init/destroy
  2018-04-11  6:38 [PATCH 1/2] powernv/npu: Add lock to prevent race in concurrent context init/destroy Alistair Popple
  2018-04-11  6:38 ` [PATCH 2/2] powernv/npu: Prevent overwriting of pnv_npu2_init_contex() callback parameters Alistair Popple
@ 2018-04-13  2:02 ` Mark Hairgrove
  2018-04-13 11:18 ` Balbir Singh
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 9+ messages in thread
From: Mark Hairgrove @ 2018-04-13  2:02 UTC (permalink / raw)
  To: Alistair Popple; +Cc: linuxppc-dev, mpe, arbab, bsingharora



On Wed, 11 Apr 2018, Alistair Popple wrote:

> The pnv_npu2_init_context() and pnv_npu2_destroy_context() functions are
> used to allocate/free contexts to allow address translation and shootdown
> by the NPU on a particular GPU. Context initialisation is implicitly safe
> as it is protected by the requirement mmap_sem be held in write mode,
> however pnv_npu2_destroy_context() does not require mmap_sem to be held and
> it is not safe to call with a concurrent initialisation for a different
> GPU.
> 
> It was assumed the driver would ensure destruction was not called
> concurrently with initialisation. However the driver may be simplified by
> allowing concurrent initialisation and destruction for different GPUs. As
> npu context creation/destruction is not a performance critical path and the
> critical section is not large a single spinlock is used for simplicity.
> 
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> ---
>  arch/powerpc/platforms/powernv/npu-dma.c | 51 ++++++++++++++++++++++++++------
>  1 file changed, 42 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
> index 1cbef1f9cd37..cb77162f4e7a 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -34,6 +34,12 @@
>  #define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
>  
>  /*
> + * spinlock to protect initialisation of an npu_context for a particular
> + * mm_struct.
> + */
> +DEFINE_SPINLOCK(npu_context_lock);

static DEFINE_SPINLOCK

> +
> +/*
>   * Other types of TCE cache invalidation are not functional in the
>   * hardware.
>   */
> @@ -694,7 +700,8 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
>   * Returns an error if there no contexts are currently available or a
>   * npu_context which should be passed to pnv_npu2_handle_fault().
>   *
> - * mmap_sem must be held in write mode.
> + * mmap_sem must be held in write mode and must not be called from interrupt
> + * context.
>   */
>  struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  			unsigned long flags,
> @@ -741,7 +748,9 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  	/*
>  	 * Setup the NPU context table for a particular GPU. These need to be
>  	 * per-GPU as we need the tables to filter ATSDs when there are no
> -	 * active contexts on a particular GPU.
> +	 * active contexts on a particular GPU. It is safe for these to be
> +	 * called concurrently with destroy as the OPAL call takes appropriate
> +	 * locks and refcounts on init/destroy.
>  	 */
>  	rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
>  				PCI_DEVID(gpdev->bus->number, gpdev->devfn));
> @@ -752,8 +761,19 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  	 * We store the npu pci device so we can more easily get at the
>  	 * associated npus.
>  	 */
> +	spin_lock(&npu_context_lock);
>  	npu_context = mm->context.npu_context;
> +	if (npu_context)
> +		WARN_ON(!kref_get_unless_zero(&npu_context->kref));
> +	spin_unlock(&npu_context_lock);
> +
>  	if (!npu_context) {
> +		/*
> +		 * We can set up these fields without holding the
> +		 * npu_context_lock as the npu_context hasn't been returned to
> +		 * the caller meaning it can't be destroyed. Parallel allocation
> +		 * is protected against by mmap_sem.
> +		 */
>  		rc = -ENOMEM;
>  		npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
>  		if (npu_context) {
> @@ -772,8 +792,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  		}
>  
>  		mm->context.npu_context = npu_context;
> -	} else {
> -		WARN_ON(!kref_get_unless_zero(&npu_context->kref));
>  	}
>  
>  	npu_context->release_cb = cb;
> @@ -811,15 +829,16 @@ static void pnv_npu2_release_context(struct kref *kref)
>  		mm_context_remove_copro(npu_context->mm);

mm_context_remove_copro will now be called while holding a spin lock. Just
as a sanity check, is that ok? I haven't hit any problems in testing
and I see radix__flush_all_mm call preempt_disable/enable so I assume so,
but it doesn't hurt to double-check my understanding.

>  
>  	npu_context->mm->context.npu_context = NULL;
> -	mmu_notifier_unregister(&npu_context->mn,
> -				npu_context->mm);
> -
> -	kfree(npu_context);
>  }
>  
> +/*
> + * Destroy a context on the given GPU. May free the npu_context if it is no
> + * longer active on any GPUs. Must not be called from interrupt context.
> + */
>  void pnv_npu2_destroy_context(struct npu_context *npu_context,
>  			struct pci_dev *gpdev)
>  {
> +	int removed;
>  	struct pnv_phb *nphb;
>  	struct npu *npu;
>  	struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
> @@ -841,7 +860,21 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
>  	WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
>  	opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
>  				PCI_DEVID(gpdev->bus->number, gpdev->devfn));
> -	kref_put(&npu_context->kref, pnv_npu2_release_context);
> +	spin_lock(&npu_context_lock);
> +	removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
> +	spin_unlock(&npu_context_lock);
> +
> +	/*
> +	 * We need to do this outside of pnv_npu2_release_context so that it is
> +	 * outside the spinlock as mmu_notifier_destroy uses SRCU.
> +	 */
> +	if (removed) {
> +		mmu_notifier_unregister(&npu_context->mn,
> +					npu_context->mm);
> +
> +		kfree(npu_context);
> +	}
> +
>  }
>  EXPORT_SYMBOL(pnv_npu2_destroy_context);
>  
> -- 
> 2.11.0
> 
> 

Reviewed-by: Mark Hairgrove <mhairgrove@nvidia.com>
Tested-by: Mark Hairgrove <mhairgrove@nvidia.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] powernv/npu: Prevent overwriting of pnv_npu2_init_contex() callback parameters
  2018-04-11  6:38 ` [PATCH 2/2] powernv/npu: Prevent overwriting of pnv_npu2_init_contex() callback parameters Alistair Popple
@ 2018-04-13  2:02   ` Mark Hairgrove
  2018-04-13 11:21     ` Balbir Singh
  2018-04-20  3:51   ` Alistair Popple
  1 sibling, 1 reply; 9+ messages in thread
From: Mark Hairgrove @ 2018-04-13  2:02 UTC (permalink / raw)
  To: Alistair Popple; +Cc: linuxppc-dev, mpe, arbab, bsingharora



On Wed, 11 Apr 2018, Alistair Popple wrote:

> There is a single npu context per set of callback parameters. Callers
> should be prevented from overwriting existing callback values so instead
> return an error if different parameters are passed.
> 
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> ---
>  arch/powerpc/include/asm/powernv.h       |  2 +-
>  arch/powerpc/platforms/powernv/npu-dma.c | 16 +++++++++++++---
>  2 files changed, 14 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
> index dc5f6a5d4575..362ea12a4501 100644
> --- a/arch/powerpc/include/asm/powernv.h
> +++ b/arch/powerpc/include/asm/powernv.h
> @@ -15,7 +15,7 @@
>  extern void powernv_set_nmmu_ptcr(unsigned long ptcr);
>  extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  			unsigned long flags,
> -			struct npu_context *(*cb)(struct npu_context *, void *),
> +			void (*cb)(struct npu_context *, void *),
>  			void *priv);
>  extern void pnv_npu2_destroy_context(struct npu_context *context,
>  				struct pci_dev *gpdev);
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
> index cb77162f4e7a..193f43ea3fbc 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -407,7 +407,7 @@ struct npu_context {
>  	bool nmmu_flush;
>  
>  	/* Callback to stop translation requests on a given GPU */
> -	struct npu_context *(*release_cb)(struct npu_context *, void *);
> +	void (*release_cb)(struct npu_context *context, void *priv);
>  
>  	/*
>  	 * Private pointer passed to the above callback for usage by
> @@ -705,7 +705,7 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
>   */
>  struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  			unsigned long flags,
> -			struct npu_context *(*cb)(struct npu_context *, void *),
> +			void (*cb)(struct npu_context *, void *),
>  			void *priv)
>  {
>  	int rc;
> @@ -763,8 +763,18 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  	 */
>  	spin_lock(&npu_context_lock);
>  	npu_context = mm->context.npu_context;
> -	if (npu_context)
> +	if (npu_context) {
> +		if (npu_context->release_cb != cb ||
> +			npu_context->priv != priv) {
> +			spin_unlock(&npu_context_lock);
> +			opal_npu_destroy_context(nphb->opal_id, mm->context.id,
> +						PCI_DEVID(gpdev->bus->number,
> +							gpdev->devfn));
> +			return ERR_PTR(-EINVAL);
> +		}
> +
>  		WARN_ON(!kref_get_unless_zero(&npu_context->kref));
> +	}
>  	spin_unlock(&npu_context_lock);
>  
>  	if (!npu_context) {
> -- 
> 2.11.0
> 
> 

Reviewed-by: Mark Hairgrove <mhairgrove@nvidia.com>
Tested-by: Mark Hairgrove <mhairgrove@nvidia.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] powernv/npu: Add lock to prevent race in concurrent context init/destroy
  2018-04-11  6:38 [PATCH 1/2] powernv/npu: Add lock to prevent race in concurrent context init/destroy Alistair Popple
  2018-04-11  6:38 ` [PATCH 2/2] powernv/npu: Prevent overwriting of pnv_npu2_init_contex() callback parameters Alistair Popple
  2018-04-13  2:02 ` [PATCH 1/2] powernv/npu: Add lock to prevent race in concurrent context init/destroy Mark Hairgrove
@ 2018-04-13 11:18 ` Balbir Singh
  2018-04-20  3:50 ` Alistair Popple
  2018-04-24  3:48 ` [1/2] " Michael Ellerman
  4 siblings, 0 replies; 9+ messages in thread
From: Balbir Singh @ 2018-04-13 11:18 UTC (permalink / raw)
  To: Alistair Popple
  Cc: open list:LINUX FOR POWERPC (32-BIT AND 64-BIT),
	Michael Ellerman, Mark Hairgrove, arbab

On Wed, Apr 11, 2018 at 4:38 PM, Alistair Popple <alistair@popple.id.au> wrote:
> The pnv_npu2_init_context() and pnv_npu2_destroy_context() functions are
> used to allocate/free contexts to allow address translation and shootdown
> by the NPU on a particular GPU. Context initialisation is implicitly safe
> as it is protected by the requirement mmap_sem be held in write mode,
> however pnv_npu2_destroy_context() does not require mmap_sem to be held and
> it is not safe to call with a concurrent initialisation for a different
> GPU.
>
> It was assumed the driver would ensure destruction was not called
> concurrently with initialisation. However the driver may be simplified by
> allowing concurrent initialisation and destruction for different GPUs. As
> npu context creation/destruction is not a performance critical path and the
> critical section is not large a single spinlock is used for simplicity.
>
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> ---
>  arch/powerpc/platforms/powernv/npu-dma.c | 51 ++++++++++++++++++++++++++------
>  1 file changed, 42 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
> index 1cbef1f9cd37..cb77162f4e7a 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -34,6 +34,12 @@
>  #define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
>
>  /*
> + * spinlock to protect initialisation of an npu_context for a particular
> + * mm_struct.
> + */
> +DEFINE_SPINLOCK(npu_context_lock);
> +
> +/*
>   * Other types of TCE cache invalidation are not functional in the
>   * hardware.
>   */
> @@ -694,7 +700,8 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
>   * Returns an error if there no contexts are currently available or a
>   * npu_context which should be passed to pnv_npu2_handle_fault().
>   *
> - * mmap_sem must be held in write mode.
> + * mmap_sem must be held in write mode and must not be called from interrupt
> + * context.
>   */
>  struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>                         unsigned long flags,
> @@ -741,7 +748,9 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>         /*
>          * Setup the NPU context table for a particular GPU. These need to be
>          * per-GPU as we need the tables to filter ATSDs when there are no
> -        * active contexts on a particular GPU.
> +        * active contexts on a particular GPU. It is safe for these to be
> +        * called concurrently with destroy as the OPAL call takes appropriate
> +        * locks and refcounts on init/destroy.
>          */
>         rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
>                                 PCI_DEVID(gpdev->bus->number, gpdev->devfn));
> @@ -752,8 +761,19 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>          * We store the npu pci device so we can more easily get at the
>          * associated npus.
>          */
> +       spin_lock(&npu_context_lock);
>         npu_context = mm->context.npu_context;
> +       if (npu_context)
> +               WARN_ON(!kref_get_unless_zero(&npu_context->kref));
> +       spin_unlock(&npu_context_lock);
> +
>         if (!npu_context) {
> +               /*
> +                * We can set up these fields without holding the
> +                * npu_context_lock as the npu_context hasn't been returned to
> +                * the caller meaning it can't be destroyed. Parallel allocation
> +                * is protected against by mmap_sem.
> +                */
>                 rc = -ENOMEM;
>                 npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
>                 if (npu_context) {
> @@ -772,8 +792,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>                 }
>
>                 mm->context.npu_context = npu_context;
> -       } else {
> -               WARN_ON(!kref_get_unless_zero(&npu_context->kref));
>         }
>
>         npu_context->release_cb = cb;
> @@ -811,15 +829,16 @@ static void pnv_npu2_release_context(struct kref *kref)
>                 mm_context_remove_copro(npu_context->mm);
>
>         npu_context->mm->context.npu_context = NULL;
> -       mmu_notifier_unregister(&npu_context->mn,
> -                               npu_context->mm);
> -
> -       kfree(npu_context);
>  }
>
> +/*
> + * Destroy a context on the given GPU. May free the npu_context if it is no
> + * longer active on any GPUs. Must not be called from interrupt context.
> + */
>  void pnv_npu2_destroy_context(struct npu_context *npu_context,
>                         struct pci_dev *gpdev)
>  {
> +       int removed;
>         struct pnv_phb *nphb;
>         struct npu *npu;
>         struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
> @@ -841,7 +860,21 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
>         WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
>         opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
>                                 PCI_DEVID(gpdev->bus->number, gpdev->devfn));
> -       kref_put(&npu_context->kref, pnv_npu2_release_context);
> +       spin_lock(&npu_context_lock);
> +       removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
> +       spin_unlock(&npu_context_lock);
> +
> +       /*
> +        * We need to do this outside of pnv_npu2_release_context so that it is
> +        * outside the spinlock as mmu_notifier_destroy uses SRCU.
> +        */
> +       if (removed) {
> +               mmu_notifier_unregister(&npu_context->mn,
> +                                       npu_context->mm);
> +
> +               kfree(npu_context);
> +       }
> +

Reviewed-by: Balbir Singh <bsingharora@gmail.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] powernv/npu: Prevent overwriting of pnv_npu2_init_contex() callback parameters
  2018-04-13  2:02   ` Mark Hairgrove
@ 2018-04-13 11:21     ` Balbir Singh
  0 siblings, 0 replies; 9+ messages in thread
From: Balbir Singh @ 2018-04-13 11:21 UTC (permalink / raw)
  To: Mark Hairgrove
  Cc: Alistair Popple, open list:LINUX FOR POWERPC (32-BIT AND 64-BIT),
	Michael Ellerman, arbab

On Fri, Apr 13, 2018 at 12:02 PM, Mark Hairgrove <mhairgrove@nvidia.com> wrote:
>
>
> On Wed, 11 Apr 2018, Alistair Popple wrote:
>
>> There is a single npu context per set of callback parameters. Callers
>> should be prevented from overwriting existing callback values so instead
>> return an error if different parameters are passed.
>>
>> Signed-off-by: Alistair Popple <alistair@popple.id.au>
>> ---
>>  arch/powerpc/include/asm/powernv.h       |  2 +-
>>  arch/powerpc/platforms/powernv/npu-dma.c | 16 +++++++++++++---
>>  2 files changed, 14 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
>> index dc5f6a5d4575..362ea12a4501 100644
>> --- a/arch/powerpc/include/asm/powernv.h
>> +++ b/arch/powerpc/include/asm/powernv.h
>> @@ -15,7 +15,7 @@
>>  extern void powernv_set_nmmu_ptcr(unsigned long ptcr);
>>  extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>>                       unsigned long flags,
>> -                     struct npu_context *(*cb)(struct npu_context *, void *),
>> +                     void (*cb)(struct npu_context *, void *),
>>                       void *priv);
>>  extern void pnv_npu2_destroy_context(struct npu_context *context,
>>                               struct pci_dev *gpdev);
>> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
>> index cb77162f4e7a..193f43ea3fbc 100644
>> --- a/arch/powerpc/platforms/powernv/npu-dma.c
>> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
>> @@ -407,7 +407,7 @@ struct npu_context {
>>       bool nmmu_flush;
>>
>>       /* Callback to stop translation requests on a given GPU */
>> -     struct npu_context *(*release_cb)(struct npu_context *, void *);
>> +     void (*release_cb)(struct npu_context *context, void *priv);
>>
>>       /*
>>        * Private pointer passed to the above callback for usage by
>> @@ -705,7 +705,7 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
>>   */
>>  struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>>                       unsigned long flags,
>> -                     struct npu_context *(*cb)(struct npu_context *, void *),
>> +                     void (*cb)(struct npu_context *, void *),
>>                       void *priv)
>>  {
>>       int rc;
>> @@ -763,8 +763,18 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>>        */
>>       spin_lock(&npu_context_lock);
>>       npu_context = mm->context.npu_context;
>> -     if (npu_context)
>> +     if (npu_context) {
>> +             if (npu_context->release_cb != cb ||
>> +                     npu_context->priv != priv) {
>> +                     spin_unlock(&npu_context_lock);
>> +                     opal_npu_destroy_context(nphb->opal_id, mm->context.id,
>> +                                             PCI_DEVID(gpdev->bus->number,
>> +                                                     gpdev->devfn));
>> +                     return ERR_PTR(-EINVAL);
>> +             }
>> +
>>               WARN_ON(!kref_get_unless_zero(&npu_context->kref));
>> +     }
>>       spin_unlock(&npu_context_lock);
>>
>>       if (!npu_context) {
>> --
>> 2.11.0
>>
>>
>
> Reviewed-by: Mark Hairgrove <mhairgrove@nvidia.com>
> Tested-by: Mark Hairgrove <mhairgrove@nvidia.com>
>

Reviewed-by: Balbir Singh <bsingharora@gmail.com>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] powernv/npu: Add lock to prevent race in concurrent context init/destroy
  2018-04-11  6:38 [PATCH 1/2] powernv/npu: Add lock to prevent race in concurrent context init/destroy Alistair Popple
                   ` (2 preceding siblings ...)
  2018-04-13 11:18 ` Balbir Singh
@ 2018-04-20  3:50 ` Alistair Popple
  2018-04-24  3:48 ` [1/2] " Michael Ellerman
  4 siblings, 0 replies; 9+ messages in thread
From: Alistair Popple @ 2018-04-20  3:50 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: mpe, mhairgrove, arbab

Sorry, forgot to include:

Fixes: 1ab66d1fbada ("powerpc/powernv: Introduce address translation services for Nvlink2")

Thanks

On Wednesday, 11 April 2018 4:38:54 PM AEST Alistair Popple wrote:
> The pnv_npu2_init_context() and pnv_npu2_destroy_context() functions are
> used to allocate/free contexts to allow address translation and shootdown
> by the NPU on a particular GPU. Context initialisation is implicitly safe
> as it is protected by the requirement mmap_sem be held in write mode,
> however pnv_npu2_destroy_context() does not require mmap_sem to be held and
> it is not safe to call with a concurrent initialisation for a different
> GPU.
> 
> It was assumed the driver would ensure destruction was not called
> concurrently with initialisation. However the driver may be simplified by
> allowing concurrent initialisation and destruction for different GPUs. As
> npu context creation/destruction is not a performance critical path and the
> critical section is not large a single spinlock is used for simplicity.
> 
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> ---
>  arch/powerpc/platforms/powernv/npu-dma.c | 51 ++++++++++++++++++++++++++------
>  1 file changed, 42 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
> index 1cbef1f9cd37..cb77162f4e7a 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -34,6 +34,12 @@
>  #define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
>  
>  /*
> + * spinlock to protect initialisation of an npu_context for a particular
> + * mm_struct.
> + */
> +DEFINE_SPINLOCK(npu_context_lock);
> +
> +/*
>   * Other types of TCE cache invalidation are not functional in the
>   * hardware.
>   */
> @@ -694,7 +700,8 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
>   * Returns an error if there no contexts are currently available or a
>   * npu_context which should be passed to pnv_npu2_handle_fault().
>   *
> - * mmap_sem must be held in write mode.
> + * mmap_sem must be held in write mode and must not be called from interrupt
> + * context.
>   */
>  struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  			unsigned long flags,
> @@ -741,7 +748,9 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  	/*
>  	 * Setup the NPU context table for a particular GPU. These need to be
>  	 * per-GPU as we need the tables to filter ATSDs when there are no
> -	 * active contexts on a particular GPU.
> +	 * active contexts on a particular GPU. It is safe for these to be
> +	 * called concurrently with destroy as the OPAL call takes appropriate
> +	 * locks and refcounts on init/destroy.
>  	 */
>  	rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
>  				PCI_DEVID(gpdev->bus->number, gpdev->devfn));
> @@ -752,8 +761,19 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  	 * We store the npu pci device so we can more easily get at the
>  	 * associated npus.
>  	 */
> +	spin_lock(&npu_context_lock);
>  	npu_context = mm->context.npu_context;
> +	if (npu_context)
> +		WARN_ON(!kref_get_unless_zero(&npu_context->kref));
> +	spin_unlock(&npu_context_lock);
> +
>  	if (!npu_context) {
> +		/*
> +		 * We can set up these fields without holding the
> +		 * npu_context_lock as the npu_context hasn't been returned to
> +		 * the caller meaning it can't be destroyed. Parallel allocation
> +		 * is protected against by mmap_sem.
> +		 */
>  		rc = -ENOMEM;
>  		npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
>  		if (npu_context) {
> @@ -772,8 +792,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  		}
>  
>  		mm->context.npu_context = npu_context;
> -	} else {
> -		WARN_ON(!kref_get_unless_zero(&npu_context->kref));
>  	}
>  
>  	npu_context->release_cb = cb;
> @@ -811,15 +829,16 @@ static void pnv_npu2_release_context(struct kref *kref)
>  		mm_context_remove_copro(npu_context->mm);
>  
>  	npu_context->mm->context.npu_context = NULL;
> -	mmu_notifier_unregister(&npu_context->mn,
> -				npu_context->mm);
> -
> -	kfree(npu_context);
>  }
>  
> +/*
> + * Destroy a context on the given GPU. May free the npu_context if it is no
> + * longer active on any GPUs. Must not be called from interrupt context.
> + */
>  void pnv_npu2_destroy_context(struct npu_context *npu_context,
>  			struct pci_dev *gpdev)
>  {
> +	int removed;
>  	struct pnv_phb *nphb;
>  	struct npu *npu;
>  	struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
> @@ -841,7 +860,21 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
>  	WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
>  	opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
>  				PCI_DEVID(gpdev->bus->number, gpdev->devfn));
> -	kref_put(&npu_context->kref, pnv_npu2_release_context);
> +	spin_lock(&npu_context_lock);
> +	removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
> +	spin_unlock(&npu_context_lock);
> +
> +	/*
> +	 * We need to do this outside of pnv_npu2_release_context so that it is
> +	 * outside the spinlock as mmu_notifier_destroy uses SRCU.
> +	 */
> +	if (removed) {
> +		mmu_notifier_unregister(&npu_context->mn,
> +					npu_context->mm);
> +
> +		kfree(npu_context);
> +	}
> +
>  }
>  EXPORT_SYMBOL(pnv_npu2_destroy_context);
>  
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] powernv/npu: Prevent overwriting of pnv_npu2_init_contex() callback parameters
  2018-04-11  6:38 ` [PATCH 2/2] powernv/npu: Prevent overwriting of pnv_npu2_init_contex() callback parameters Alistair Popple
  2018-04-13  2:02   ` Mark Hairgrove
@ 2018-04-20  3:51   ` Alistair Popple
  1 sibling, 0 replies; 9+ messages in thread
From: Alistair Popple @ 2018-04-20  3:51 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: mpe, mhairgrove, arbab

Sorry, forgot to include:

Fixes: 1ab66d1fbada ("powerpc/powernv: Introduce address translation services for Nvlink2")

Thanks

On Wednesday, 11 April 2018 4:38:55 PM AEST Alistair Popple wrote:
> There is a single npu context per set of callback parameters. Callers
> should be prevented from overwriting existing callback values so instead
> return an error if different parameters are passed.
> 
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> ---
>  arch/powerpc/include/asm/powernv.h       |  2 +-
>  arch/powerpc/platforms/powernv/npu-dma.c | 16 +++++++++++++---
>  2 files changed, 14 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
> index dc5f6a5d4575..362ea12a4501 100644
> --- a/arch/powerpc/include/asm/powernv.h
> +++ b/arch/powerpc/include/asm/powernv.h
> @@ -15,7 +15,7 @@
>  extern void powernv_set_nmmu_ptcr(unsigned long ptcr);
>  extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  			unsigned long flags,
> -			struct npu_context *(*cb)(struct npu_context *, void *),
> +			void (*cb)(struct npu_context *, void *),
>  			void *priv);
>  extern void pnv_npu2_destroy_context(struct npu_context *context,
>  				struct pci_dev *gpdev);
> diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
> index cb77162f4e7a..193f43ea3fbc 100644
> --- a/arch/powerpc/platforms/powernv/npu-dma.c
> +++ b/arch/powerpc/platforms/powernv/npu-dma.c
> @@ -407,7 +407,7 @@ struct npu_context {
>  	bool nmmu_flush;
>  
>  	/* Callback to stop translation requests on a given GPU */
> -	struct npu_context *(*release_cb)(struct npu_context *, void *);
> +	void (*release_cb)(struct npu_context *context, void *priv);
>  
>  	/*
>  	 * Private pointer passed to the above callback for usage by
> @@ -705,7 +705,7 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
>   */
>  struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  			unsigned long flags,
> -			struct npu_context *(*cb)(struct npu_context *, void *),
> +			void (*cb)(struct npu_context *, void *),
>  			void *priv)
>  {
>  	int rc;
> @@ -763,8 +763,18 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
>  	 */
>  	spin_lock(&npu_context_lock);
>  	npu_context = mm->context.npu_context;
> -	if (npu_context)
> +	if (npu_context) {
> +		if (npu_context->release_cb != cb ||
> +			npu_context->priv != priv) {
> +			spin_unlock(&npu_context_lock);
> +			opal_npu_destroy_context(nphb->opal_id, mm->context.id,
> +						PCI_DEVID(gpdev->bus->number,
> +							gpdev->devfn));
> +			return ERR_PTR(-EINVAL);
> +		}
> +
>  		WARN_ON(!kref_get_unless_zero(&npu_context->kref));
> +	}
>  	spin_unlock(&npu_context_lock);
>  
>  	if (!npu_context) {
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [1/2] powernv/npu: Add lock to prevent race in concurrent context init/destroy
  2018-04-11  6:38 [PATCH 1/2] powernv/npu: Add lock to prevent race in concurrent context init/destroy Alistair Popple
                   ` (3 preceding siblings ...)
  2018-04-20  3:50 ` Alistair Popple
@ 2018-04-24  3:48 ` Michael Ellerman
  4 siblings, 0 replies; 9+ messages in thread
From: Michael Ellerman @ 2018-04-24  3:48 UTC (permalink / raw)
  To: Alistair Popple, linuxppc-dev; +Cc: Alistair Popple, mhairgrove, arbab

On Wed, 2018-04-11 at 06:38:54 UTC, Alistair Popple wrote:
> The pnv_npu2_init_context() and pnv_npu2_destroy_context() functions are
> used to allocate/free contexts to allow address translation and shootdown
> by the NPU on a particular GPU. Context initialisation is implicitly safe
> as it is protected by the requirement mmap_sem be held in write mode,
> however pnv_npu2_destroy_context() does not require mmap_sem to be held and
> it is not safe to call with a concurrent initialisation for a different
> GPU.
> 
> It was assumed the driver would ensure destruction was not called
> concurrently with initialisation. However the driver may be simplified by
> allowing concurrent initialisation and destruction for different GPUs. As
> npu context creation/destruction is not a performance critical path and the
> critical section is not large a single spinlock is used for simplicity.
> 
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> Reviewed-by: Mark Hairgrove <mhairgrove@nvidia.com>
> Tested-by: Mark Hairgrove <mhairgrove@nvidia.com>
> Reviewed-by: Balbir Singh <bsingharora@gmail.com>

Series applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/28a5933e8d362766462ea9e5f135e1

cheers

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-04-24  3:48 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-11  6:38 [PATCH 1/2] powernv/npu: Add lock to prevent race in concurrent context init/destroy Alistair Popple
2018-04-11  6:38 ` [PATCH 2/2] powernv/npu: Prevent overwriting of pnv_npu2_init_contex() callback parameters Alistair Popple
2018-04-13  2:02   ` Mark Hairgrove
2018-04-13 11:21     ` Balbir Singh
2018-04-20  3:51   ` Alistair Popple
2018-04-13  2:02 ` [PATCH 1/2] powernv/npu: Add lock to prevent race in concurrent context init/destroy Mark Hairgrove
2018-04-13 11:18 ` Balbir Singh
2018-04-20  3:50 ` Alistair Popple
2018-04-24  3:48 ` [1/2] " Michael Ellerman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.