linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] cxl: fix nested locking hang during EEH hotplug
@ 2017-02-06  1:07 Andrew Donnellan
  2017-02-07 16:23 ` Frederic Barrat
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Andrew Donnellan @ 2017-02-06  1:07 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: ukrishn, pradghos, fbarrat, vaibhav, imunsie, stable

Commit 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU
not configured") introduced a rwsem to fix an invalid memory access that
occurred when someone attempts to access the config space of an AFU on a
vPHB whilst the AFU is deconfigured, such as during EEH recovery.

It turns out that it's possible to run into a nested locking issue when EEH
recovery fails and a full device hotplug is required.
cxl_pci_error_detected() deconfigures the AFU, taking a writer lock on
configured_rwsem. When EEH recovery fails, the EEH code calls
pci_hp_remove_devices() to remove the device, which in turn calls
cxl_remove() -> cxl_pci_remove_afu() -> pci_deconfigure_afu(), which tries
to grab the writer lock that's already held.

Standard rwsem semantics don't express what we really want to do here and
don't allow for nested locking. Fix this by replacing the rwsem with an
atomic_t which we can control more finely. Allow the AFU to be locked
multiple times so long as there are no readers.

Fixes: 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU not configured")
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>

---

I've asked Uma and Pradipta to give this a test.

---
 drivers/misc/cxl/cxl.h  |  5 +++--
 drivers/misc/cxl/main.c |  3 +--
 drivers/misc/cxl/pci.c  | 11 +++++++++--
 drivers/misc/cxl/vphb.c | 18 ++++++++++++++----
 4 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b4a43fd14b99..08e7d3a54425 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -418,8 +418,9 @@ struct cxl_afu {
 	struct dentry *debugfs;
 	struct mutex contexts_lock;
 	spinlock_t afu_cntl_lock;
-	/* Used to block access to AFU config space while deconfigured */
-	struct rw_semaphore configured_rwsem;
+
+	/* -1: AFU deconfigured/locked, >= 0: number of readers */
+	atomic_t configured_state;
 
 	/* AFU error buffer fields and bin attribute for sysfs */
 	u64 eb_len, eb_offset;
diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
index 2a6bf1d0a3a4..cc1706a92ace 100644
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -268,8 +268,7 @@ struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice)
 	idr_init(&afu->contexts_idr);
 	mutex_init(&afu->contexts_lock);
 	spin_lock_init(&afu->afu_cntl_lock);
-	init_rwsem(&afu->configured_rwsem);
-	down_write(&afu->configured_rwsem);
+	atomic_set(&afu->configured_state, -1);
 	afu->prefault_mode = CXL_PREFAULT_NONE;
 	afu->irqs_max = afu->adapter->user_irqs;
 
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index f512e13ec0f2..bdfa5ff11aea 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1129,7 +1129,7 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc
 	if ((rc = cxl_native_register_psl_irq(afu)))
 		goto err2;
 
-	up_write(&afu->configured_rwsem);
+	atomic_set(&afu->configured_state, 0);
 	return 0;
 
 err2:
@@ -1142,7 +1142,14 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc
 
 static void pci_deconfigure_afu(struct cxl_afu *afu)
 {
-	down_write(&afu->configured_rwsem);
+	/*
+	 * It's okay to deconfigure when AFU is already locked, otherwise wait
+	 * until there are no readers
+	 */
+	if (atomic_read(&afu->configured_state) != -1) {
+		while (atomic_cmpxchg(&afu->configured_state, 0, -1) != -1)
+			schedule();
+	}
 	cxl_native_release_psl_irq(afu);
 	if (afu->adapter->native->sl_ops->release_serr_irq)
 		afu->adapter->native->sl_ops->release_serr_irq(afu);
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
index 639a343b7836..512a4897dbf6 100644
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -83,6 +83,16 @@ static inline struct cxl_afu *pci_bus_to_afu(struct pci_bus *bus)
 	return phb ? phb->private_data : NULL;
 }
 
+static void cxl_afu_configured_put(struct cxl_afu *afu)
+{
+	atomic_dec_if_positive(&afu->configured_state);
+}
+
+static bool cxl_afu_configured_get(struct cxl_afu *afu)
+{
+	return atomic_inc_unless_negative(&afu->configured_state);
+}
+
 static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
 				       struct cxl_afu *afu, int *_record)
 {
@@ -107,7 +117,7 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
 
 	afu = pci_bus_to_afu(bus);
 	/* Grab a reader lock on afu. */
-	if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
+	if (afu == NULL || !cxl_afu_configured_get(afu))
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	rc = cxl_pcie_config_info(bus, devfn, afu, &record);
@@ -132,7 +142,7 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
 	}
 
 out:
-	up_read(&afu->configured_rwsem);
+	cxl_afu_configured_put(afu);
 	return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
 }
 
@@ -144,7 +154,7 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
 
 	afu = pci_bus_to_afu(bus);
 	/* Grab a reader lock on afu. */
-	if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
+	if (afu == NULL || !cxl_afu_configured_get(afu))
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	rc = cxl_pcie_config_info(bus, devfn, afu, &record);
@@ -166,7 +176,7 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
 	}
 
 out:
-	up_read(&afu->configured_rwsem);
+	cxl_afu_configured_put(afu);
 	return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL;
 }
 
-- 
Andrew Donnellan              OzLabs, ADL Canberra
andrew.donnellan@au1.ibm.com  IBM Australia Limited

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] cxl: fix nested locking hang during EEH hotplug
  2017-02-06  1:07 [PATCH] cxl: fix nested locking hang during EEH hotplug Andrew Donnellan
@ 2017-02-07 16:23 ` Frederic Barrat
  2017-02-21  7:52 ` Andrew Donnellan
  2017-02-27 10:11 ` Michael Ellerman
  2 siblings, 0 replies; 4+ messages in thread
From: Frederic Barrat @ 2017-02-07 16:23 UTC (permalink / raw)
  To: Andrew Donnellan, linuxppc-dev
  Cc: vaibhav, stable, ukrishn, pradghos, imunsie



Le 06/02/2017 à 02:07, Andrew Donnellan a écrit :
> Commit 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU
> not configured") introduced a rwsem to fix an invalid memory access that
> occurred when someone attempts to access the config space of an AFU on a
> vPHB whilst the AFU is deconfigured, such as during EEH recovery.
>
> It turns out that it's possible to run into a nested locking issue when EEH
> recovery fails and a full device hotplug is required.
> cxl_pci_error_detected() deconfigures the AFU, taking a writer lock on
> configured_rwsem. When EEH recovery fails, the EEH code calls
> pci_hp_remove_devices() to remove the device, which in turn calls
> cxl_remove() -> cxl_pci_remove_afu() -> pci_deconfigure_afu(), which tries
> to grab the writer lock that's already held.
>
> Standard rwsem semantics don't express what we really want to do here and
> don't allow for nested locking. Fix this by replacing the rwsem with an
> atomic_t which we can control more finely. Allow the AFU to be locked
> multiple times so long as there are no readers.
>
> Fixes: 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU not configured")
> Cc: stable@vger.kernel.org # v4.9+
> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
>
> ---
>
> I've asked Uma and Pradipta to give this a test.
>
> ---

It looks ok to me. Once tested by cxlflash:
Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>

   Fred




>  drivers/misc/cxl/cxl.h  |  5 +++--
>  drivers/misc/cxl/main.c |  3 +--
>  drivers/misc/cxl/pci.c  | 11 +++++++++--
>  drivers/misc/cxl/vphb.c | 18 ++++++++++++++----
>  4 files changed, 27 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
> index b4a43fd14b99..08e7d3a54425 100644
> --- a/drivers/misc/cxl/cxl.h
> +++ b/drivers/misc/cxl/cxl.h
> @@ -418,8 +418,9 @@ struct cxl_afu {
>  	struct dentry *debugfs;
>  	struct mutex contexts_lock;
>  	spinlock_t afu_cntl_lock;
> -	/* Used to block access to AFU config space while deconfigured */
> -	struct rw_semaphore configured_rwsem;
> +
> +	/* -1: AFU deconfigured/locked, >= 0: number of readers */
> +	atomic_t configured_state;
>
>  	/* AFU error buffer fields and bin attribute for sysfs */
>  	u64 eb_len, eb_offset;
> diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
> index 2a6bf1d0a3a4..cc1706a92ace 100644
> --- a/drivers/misc/cxl/main.c
> +++ b/drivers/misc/cxl/main.c
> @@ -268,8 +268,7 @@ struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice)
>  	idr_init(&afu->contexts_idr);
>  	mutex_init(&afu->contexts_lock);
>  	spin_lock_init(&afu->afu_cntl_lock);
> -	init_rwsem(&afu->configured_rwsem);
> -	down_write(&afu->configured_rwsem);
> +	atomic_set(&afu->configured_state, -1);
>  	afu->prefault_mode = CXL_PREFAULT_NONE;
>  	afu->irqs_max = afu->adapter->user_irqs;
>
> diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
> index f512e13ec0f2..bdfa5ff11aea 100644
> --- a/drivers/misc/cxl/pci.c
> +++ b/drivers/misc/cxl/pci.c
> @@ -1129,7 +1129,7 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc
>  	if ((rc = cxl_native_register_psl_irq(afu)))
>  		goto err2;
>
> -	up_write(&afu->configured_rwsem);
> +	atomic_set(&afu->configured_state, 0);
>  	return 0;
>
>  err2:
> @@ -1142,7 +1142,14 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc
>
>  static void pci_deconfigure_afu(struct cxl_afu *afu)
>  {
> -	down_write(&afu->configured_rwsem);
> +	/*
> +	 * It's okay to deconfigure when AFU is already locked, otherwise wait
> +	 * until there are no readers
> +	 */
> +	if (atomic_read(&afu->configured_state) != -1) {
> +		while (atomic_cmpxchg(&afu->configured_state, 0, -1) != -1)
> +			schedule();
> +	}
>  	cxl_native_release_psl_irq(afu);
>  	if (afu->adapter->native->sl_ops->release_serr_irq)
>  		afu->adapter->native->sl_ops->release_serr_irq(afu);
> diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
> index 639a343b7836..512a4897dbf6 100644
> --- a/drivers/misc/cxl/vphb.c
> +++ b/drivers/misc/cxl/vphb.c
> @@ -83,6 +83,16 @@ static inline struct cxl_afu *pci_bus_to_afu(struct pci_bus *bus)
>  	return phb ? phb->private_data : NULL;
>  }
>
> +static void cxl_afu_configured_put(struct cxl_afu *afu)
> +{
> +	atomic_dec_if_positive(&afu->configured_state);
> +}
> +
> +static bool cxl_afu_configured_get(struct cxl_afu *afu)
> +{
> +	return atomic_inc_unless_negative(&afu->configured_state);
> +}
> +
>  static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
>  				       struct cxl_afu *afu, int *_record)
>  {
> @@ -107,7 +117,7 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
>
>  	afu = pci_bus_to_afu(bus);
>  	/* Grab a reader lock on afu. */
> -	if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
> +	if (afu == NULL || !cxl_afu_configured_get(afu))
>  		return PCIBIOS_DEVICE_NOT_FOUND;
>
>  	rc = cxl_pcie_config_info(bus, devfn, afu, &record);
> @@ -132,7 +142,7 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
>  	}
>
>  out:
> -	up_read(&afu->configured_rwsem);
> +	cxl_afu_configured_put(afu);
>  	return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
>  }
>
> @@ -144,7 +154,7 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
>
>  	afu = pci_bus_to_afu(bus);
>  	/* Grab a reader lock on afu. */
> -	if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
> +	if (afu == NULL || !cxl_afu_configured_get(afu))
>  		return PCIBIOS_DEVICE_NOT_FOUND;
>
>  	rc = cxl_pcie_config_info(bus, devfn, afu, &record);
> @@ -166,7 +176,7 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
>  	}
>
>  out:
> -	up_read(&afu->configured_rwsem);
> +	cxl_afu_configured_put(afu);
>  	return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL;
>  }
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] cxl: fix nested locking hang during EEH hotplug
  2017-02-06  1:07 [PATCH] cxl: fix nested locking hang during EEH hotplug Andrew Donnellan
  2017-02-07 16:23 ` Frederic Barrat
@ 2017-02-21  7:52 ` Andrew Donnellan
  2017-02-27 10:11 ` Michael Ellerman
  2 siblings, 0 replies; 4+ messages in thread
From: Andrew Donnellan @ 2017-02-21  7:52 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: vaibhav, stable, ukrishn, fbarrat, pradghos, imunsie

On 06/02/17 12:07, Andrew Donnellan wrote:
> Commit 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU
> not configured") introduced a rwsem to fix an invalid memory access that
> occurred when someone attempts to access the config space of an AFU on a
> vPHB whilst the AFU is deconfigured, such as during EEH recovery.
>
> It turns out that it's possible to run into a nested locking issue when EEH
> recovery fails and a full device hotplug is required.
> cxl_pci_error_detected() deconfigures the AFU, taking a writer lock on
> configured_rwsem. When EEH recovery fails, the EEH code calls
> pci_hp_remove_devices() to remove the device, which in turn calls
> cxl_remove() -> cxl_pci_remove_afu() -> pci_deconfigure_afu(), which tries
> to grab the writer lock that's already held.
>
> Standard rwsem semantics don't express what we really want to do here and
> don't allow for nested locking. Fix this by replacing the rwsem with an
> atomic_t which we can control more finely. Allow the AFU to be locked
> multiple times so long as there are no readers.
>
> Fixes: 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU not configured")
> Cc: stable@vger.kernel.org # v4.9+
> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>

This patch has now been tested within IBM and should be good to go.

Michael/stable team - please make sure this gets in at the same time as 
14a3ae34bfd0.


Andrew


>
> ---
>
> I've asked Uma and Pradipta to give this a test.
>
> ---
>  drivers/misc/cxl/cxl.h  |  5 +++--
>  drivers/misc/cxl/main.c |  3 +--
>  drivers/misc/cxl/pci.c  | 11 +++++++++--
>  drivers/misc/cxl/vphb.c | 18 ++++++++++++++----
>  4 files changed, 27 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
> index b4a43fd14b99..08e7d3a54425 100644
> --- a/drivers/misc/cxl/cxl.h
> +++ b/drivers/misc/cxl/cxl.h
> @@ -418,8 +418,9 @@ struct cxl_afu {
>  	struct dentry *debugfs;
>  	struct mutex contexts_lock;
>  	spinlock_t afu_cntl_lock;
> -	/* Used to block access to AFU config space while deconfigured */
> -	struct rw_semaphore configured_rwsem;
> +
> +	/* -1: AFU deconfigured/locked, >= 0: number of readers */
> +	atomic_t configured_state;
>
>  	/* AFU error buffer fields and bin attribute for sysfs */
>  	u64 eb_len, eb_offset;
> diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c
> index 2a6bf1d0a3a4..cc1706a92ace 100644
> --- a/drivers/misc/cxl/main.c
> +++ b/drivers/misc/cxl/main.c
> @@ -268,8 +268,7 @@ struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice)
>  	idr_init(&afu->contexts_idr);
>  	mutex_init(&afu->contexts_lock);
>  	spin_lock_init(&afu->afu_cntl_lock);
> -	init_rwsem(&afu->configured_rwsem);
> -	down_write(&afu->configured_rwsem);
> +	atomic_set(&afu->configured_state, -1);
>  	afu->prefault_mode = CXL_PREFAULT_NONE;
>  	afu->irqs_max = afu->adapter->user_irqs;
>
> diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
> index f512e13ec0f2..bdfa5ff11aea 100644
> --- a/drivers/misc/cxl/pci.c
> +++ b/drivers/misc/cxl/pci.c
> @@ -1129,7 +1129,7 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc
>  	if ((rc = cxl_native_register_psl_irq(afu)))
>  		goto err2;
>
> -	up_write(&afu->configured_rwsem);
> +	atomic_set(&afu->configured_state, 0);
>  	return 0;
>
>  err2:
> @@ -1142,7 +1142,14 @@ static int pci_configure_afu(struct cxl_afu *afu, struct cxl *adapter, struct pc
>
>  static void pci_deconfigure_afu(struct cxl_afu *afu)
>  {
> -	down_write(&afu->configured_rwsem);
> +	/*
> +	 * It's okay to deconfigure when AFU is already locked, otherwise wait
> +	 * until there are no readers
> +	 */
> +	if (atomic_read(&afu->configured_state) != -1) {
> +		while (atomic_cmpxchg(&afu->configured_state, 0, -1) != -1)
> +			schedule();
> +	}
>  	cxl_native_release_psl_irq(afu);
>  	if (afu->adapter->native->sl_ops->release_serr_irq)
>  		afu->adapter->native->sl_ops->release_serr_irq(afu);
> diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
> index 639a343b7836..512a4897dbf6 100644
> --- a/drivers/misc/cxl/vphb.c
> +++ b/drivers/misc/cxl/vphb.c
> @@ -83,6 +83,16 @@ static inline struct cxl_afu *pci_bus_to_afu(struct pci_bus *bus)
>  	return phb ? phb->private_data : NULL;
>  }
>
> +static void cxl_afu_configured_put(struct cxl_afu *afu)
> +{
> +	atomic_dec_if_positive(&afu->configured_state);
> +}
> +
> +static bool cxl_afu_configured_get(struct cxl_afu *afu)
> +{
> +	return atomic_inc_unless_negative(&afu->configured_state);
> +}
> +
>  static inline int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
>  				       struct cxl_afu *afu, int *_record)
>  {
> @@ -107,7 +117,7 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
>
>  	afu = pci_bus_to_afu(bus);
>  	/* Grab a reader lock on afu. */
> -	if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
> +	if (afu == NULL || !cxl_afu_configured_get(afu))
>  		return PCIBIOS_DEVICE_NOT_FOUND;
>
>  	rc = cxl_pcie_config_info(bus, devfn, afu, &record);
> @@ -132,7 +142,7 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
>  	}
>
>  out:
> -	up_read(&afu->configured_rwsem);
> +	cxl_afu_configured_put(afu);
>  	return rc ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
>  }
>
> @@ -144,7 +154,7 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
>
>  	afu = pci_bus_to_afu(bus);
>  	/* Grab a reader lock on afu. */
> -	if (afu == NULL || !down_read_trylock(&afu->configured_rwsem))
> +	if (afu == NULL || !cxl_afu_configured_get(afu))
>  		return PCIBIOS_DEVICE_NOT_FOUND;
>
>  	rc = cxl_pcie_config_info(bus, devfn, afu, &record);
> @@ -166,7 +176,7 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
>  	}
>
>  out:
> -	up_read(&afu->configured_rwsem);
> +	cxl_afu_configured_put(afu);
>  	return rc ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL;
>  }
>
>

-- 
Andrew Donnellan              OzLabs, ADL Canberra
andrew.donnellan@au1.ibm.com  IBM Australia Limited

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: cxl: fix nested locking hang during EEH hotplug
  2017-02-06  1:07 [PATCH] cxl: fix nested locking hang during EEH hotplug Andrew Donnellan
  2017-02-07 16:23 ` Frederic Barrat
  2017-02-21  7:52 ` Andrew Donnellan
@ 2017-02-27 10:11 ` Michael Ellerman
  2 siblings, 0 replies; 4+ messages in thread
From: Michael Ellerman @ 2017-02-27 10:11 UTC (permalink / raw)
  To: Andrew Donnellan, linuxppc-dev
  Cc: vaibhav, stable, ukrishn, fbarrat, pradghos, imunsie

On Mon, 2017-02-06 at 01:07:17 UTC, Andrew Donnellan wrote:
> Commit 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU
> not configured") introduced a rwsem to fix an invalid memory access that
> occurred when someone attempts to access the config space of an AFU on a
> vPHB whilst the AFU is deconfigured, such as during EEH recovery.
> 
> It turns out that it's possible to run into a nested locking issue when EEH
> recovery fails and a full device hotplug is required.
> cxl_pci_error_detected() deconfigures the AFU, taking a writer lock on
> configured_rwsem. When EEH recovery fails, the EEH code calls
> pci_hp_remove_devices() to remove the device, which in turn calls
> cxl_remove() -> cxl_pci_remove_afu() -> pci_deconfigure_afu(), which tries
> to grab the writer lock that's already held.
> 
> Standard rwsem semantics don't express what we really want to do here and
> don't allow for nested locking. Fix this by replacing the rwsem with an
> atomic_t which we can control more finely. Allow the AFU to be locked
> multiple times so long as there are no readers.
> 
> Fixes: 14a3ae34bfd0 ("cxl: Prevent read/write to AFU config space while AFU not configured")
> Cc: stable@vger.kernel.org # v4.9+
> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
> Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/171ed0fcd8966d82c45376f1434678

cheers

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-02-27 10:11 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-06  1:07 [PATCH] cxl: fix nested locking hang during EEH hotplug Andrew Donnellan
2017-02-07 16:23 ` Frederic Barrat
2017-02-21  7:52 ` Andrew Donnellan
2017-02-27 10:11 ` Michael Ellerman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).