All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: capture invalid hardware access v2
@ 2021-03-08 18:06 Christian König
  2021-03-09  2:03 ` Li, Dennis
  0 siblings, 1 reply; 16+ messages in thread
From: Christian König @ 2021-03-08 18:06 UTC (permalink / raw)
  To: amd-gfx; +Cc: andrey.grodzovsky, Dennis.Li

From: Dennis Li <Dennis.Li@amd.com>

When recovery thread has begun GPU reset, there should be not other
threads to access hardware, otherwise system randomly hang.

v2 (chk): rewritten from scratch, use trylock and lockdep instead of
	  hand wiring the logic.

Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74 +++++++++++++++++-----
 1 file changed, 57 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e247c3a2ec08..c990af6a43ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
 /*
  * register access helper functions.
  */
+
+/* Check if hw access should be skipped because of hotplug or device error */
+static bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
+{
+	if (adev->in_pci_err_recovery)
+		return true;
+
+#ifdef CONFIG_LOCKDEP
+	/*
+	 * This is a bit complicated to understand, so worth a comment. What we assert
+	 * here is that the GPU reset is not running on another thread in parallel.
+	 *
+	 * For this we trylock the read side of the reset semaphore, if that succeeds
+	 * we know that the reset is not running in paralell.
+	 *
+	 * If the trylock fails we assert that we are either already holding the read
+	 * side of the lock or are the reset thread itself and hold the write side of
+	 * the lock.
+	 */
+	if (down_read_trylock(&adev->reset_sem))
+		up_read(&adev->reset_sem);
+	else
+		lockdep_assert_held(&adev->reset_sem);
+#endif
+
+	return false;
+}
+
 /**
  * amdgpu_device_rreg - read a memory mapped IO or indirect register
  *
@@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
 {
 	uint32_t ret;
 
-	if (adev->in_pci_err_recovery)
+	if (amdgpu_device_skip_hw_access(adev))
 		return 0;
 
 	if ((reg * 4) < adev->rmmio_size) {
@@ -377,7 +405,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
  */
 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
 {
-	if (adev->in_pci_err_recovery)
+	if (amdgpu_device_skip_hw_access(adev))
 		return 0;
 
 	if (offset < adev->rmmio_size)
@@ -402,7 +430,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
  */
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
 {
-	if (adev->in_pci_err_recovery)
+	if (amdgpu_device_skip_hw_access(adev))
 		return;
 
 	if (offset < adev->rmmio_size)
@@ -425,7 +453,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
 			uint32_t reg, uint32_t v,
 			uint32_t acc_flags)
 {
-	if (adev->in_pci_err_recovery)
+	if (amdgpu_device_skip_hw_access(adev))
 		return;
 
 	if ((reg * 4) < adev->rmmio_size) {
@@ -452,7 +480,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
 			     uint32_t reg, uint32_t v)
 {
-	if (adev->in_pci_err_recovery)
+	if (amdgpu_device_skip_hw_access(adev))
 		return;
 
 	if (amdgpu_sriov_fullaccess(adev) &&
@@ -475,7 +503,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
  */
 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
 {
-	if (adev->in_pci_err_recovery)
+	if (amdgpu_device_skip_hw_access(adev))
 		return 0;
 
 	if ((reg * 4) < adev->rio_mem_size)
@@ -497,7 +525,7 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
  */
 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 {
-	if (adev->in_pci_err_recovery)
+	if (amdgpu_device_skip_hw_access(adev))
 		return;
 
 	if ((reg * 4) < adev->rio_mem_size)
@@ -519,7 +547,7 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
  */
 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
 {
-	if (adev->in_pci_err_recovery)
+	if (amdgpu_device_skip_hw_access(adev))
 		return 0;
 
 	if (index < adev->doorbell.num_doorbells) {
@@ -542,7 +570,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
  */
 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
 {
-	if (adev->in_pci_err_recovery)
+	if (amdgpu_device_skip_hw_access(adev))
 		return;
 
 	if (index < adev->doorbell.num_doorbells) {
@@ -563,7 +591,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
  */
 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
 {
-	if (adev->in_pci_err_recovery)
+	if (amdgpu_device_skip_hw_access(adev))
 		return 0;
 
 	if (index < adev->doorbell.num_doorbells) {
@@ -586,7 +614,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
  */
 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
 {
-	if (adev->in_pci_err_recovery)
+	if (amdgpu_device_skip_hw_access(adev))
 		return;
 
 	if (index < adev->doorbell.num_doorbells) {
@@ -610,10 +638,13 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
 				u32 pcie_index, u32 pcie_data,
 				u32 reg_addr)
 {
-	unsigned long flags;
-	u32 r;
 	void __iomem *pcie_index_offset;
 	void __iomem *pcie_data_offset;
+	unsigned long flags;
+	u32 r;
+
+	if (amdgpu_device_skip_hw_access(adev))
+		return 0;
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
@@ -641,10 +672,13 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
 				  u32 pcie_index, u32 pcie_data,
 				  u32 reg_addr)
 {
-	unsigned long flags;
-	u64 r;
 	void __iomem *pcie_index_offset;
 	void __iomem *pcie_data_offset;
+	unsigned long flags;
+	u64 r;
+
+	if (amdgpu_device_skip_hw_access(adev))
+		return 0;
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
@@ -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
 				 u32 pcie_index, u32 pcie_data,
 				 u32 reg_addr, u32 reg_data)
 {
-	unsigned long flags;
 	void __iomem *pcie_index_offset;
 	void __iomem *pcie_data_offset;
+	unsigned long flags;
+
+	if (amdgpu_device_skip_hw_access(adev))
+		return;
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
@@ -706,9 +743,12 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
 				   u32 pcie_index, u32 pcie_data,
 				   u32 reg_addr, u64 reg_data)
 {
-	unsigned long flags;
 	void __iomem *pcie_index_offset;
 	void __iomem *pcie_data_offset;
+	unsigned long flags;
+
+	if (amdgpu_device_skip_hw_access(adev))
+		return;
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* RE: [PATCH] drm/amdgpu: capture invalid hardware access v2
  2021-03-08 18:06 [PATCH] drm/amdgpu: capture invalid hardware access v2 Christian König
@ 2021-03-09  2:03 ` Li, Dennis
  2021-03-09 12:56   ` Christian König
  0 siblings, 1 reply; 16+ messages in thread
From: Li, Dennis @ 2021-03-09  2:03 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: Grodzovsky, Andrey

[AMD Official Use Only - Internal Distribution Only]

Hi, Christian,
       amdgpu_device_skip_hw_access will always assert in reset thread, which seems not a good idea.

Best Regards
Dennis Li
-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com>
Sent: Tuesday, March 9, 2021 2:07 AM
To: amd-gfx@lists.freedesktop.org
Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Li, Dennis <Dennis.Li@amd.com>
Subject: [PATCH] drm/amdgpu: capture invalid hardware access v2

From: Dennis Li <Dennis.Li@amd.com>

When recovery thread has begun GPU reset, there should be not other threads to access hardware, otherwise system randomly hang.

v2 (chk): rewritten from scratch, use trylock and lockdep instead of
  hand wiring the logic.

Signed-off-by: Dennis Li <Dennis.Li@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74 +++++++++++++++++-----
 1 file changed, 57 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e247c3a2ec08..c990af6a43ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
 /*
  * register access helper functions.
  */
+
+/* Check if hw access should be skipped because of hotplug or device
+error */ static bool amdgpu_device_skip_hw_access(struct amdgpu_device
+*adev) {
+if (adev->in_pci_err_recovery)
+return true;
+
+#ifdef CONFIG_LOCKDEP
+/*
+ * This is a bit complicated to understand, so worth a comment. What we assert
+ * here is that the GPU reset is not running on another thread in parallel.
+ *
+ * For this we trylock the read side of the reset semaphore, if that succeeds
+ * we know that the reset is not running in paralell.
+ *
+ * If the trylock fails we assert that we are either already holding the read
+ * side of the lock or are the reset thread itself and hold the write side of
+ * the lock.
+ */
+if (down_read_trylock(&adev->reset_sem))
+up_read(&adev->reset_sem);
+else
+lockdep_assert_held(&adev->reset_sem);
+#endif
+
+return false;
+}
+
 /**
  * amdgpu_device_rreg - read a memory mapped IO or indirect register
  *
@@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,  {
 uint32_t ret;

-if (adev->in_pci_err_recovery)
+if (amdgpu_device_skip_hw_access(adev))
 return 0;

 if ((reg * 4) < adev->rmmio_size) {
@@ -377,7 +405,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
  */
 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)  {
-if (adev->in_pci_err_recovery)
+if (amdgpu_device_skip_hw_access(adev))
 return 0;

 if (offset < adev->rmmio_size)
@@ -402,7 +430,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
  */
 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)  {
-if (adev->in_pci_err_recovery)
+if (amdgpu_device_skip_hw_access(adev))
 return;

 if (offset < adev->rmmio_size)
@@ -425,7 +453,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
 uint32_t reg, uint32_t v,
 uint32_t acc_flags)
 {
-if (adev->in_pci_err_recovery)
+if (amdgpu_device_skip_hw_access(adev))
 return;

 if ((reg * 4) < adev->rmmio_size) {
@@ -452,7 +480,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,  void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
      uint32_t reg, uint32_t v)
 {
-if (adev->in_pci_err_recovery)
+if (amdgpu_device_skip_hw_access(adev))
 return;

 if (amdgpu_sriov_fullaccess(adev) &&
@@ -475,7 +503,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
  */
 u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)  {
-if (adev->in_pci_err_recovery)
+if (amdgpu_device_skip_hw_access(adev))
 return 0;

 if ((reg * 4) < adev->rio_mem_size)
@@ -497,7 +525,7 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
  */
 void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)  {
-if (adev->in_pci_err_recovery)
+if (amdgpu_device_skip_hw_access(adev))
 return;

 if ((reg * 4) < adev->rio_mem_size)
@@ -519,7 +547,7 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
  */
 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)  {
-if (adev->in_pci_err_recovery)
+if (amdgpu_device_skip_hw_access(adev))
 return 0;

 if (index < adev->doorbell.num_doorbells) { @@ -542,7 +570,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
  */
 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)  {
-if (adev->in_pci_err_recovery)
+if (amdgpu_device_skip_hw_access(adev))
 return;

 if (index < adev->doorbell.num_doorbells) { @@ -563,7 +591,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
  */
 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)  {
-if (adev->in_pci_err_recovery)
+if (amdgpu_device_skip_hw_access(adev))
 return 0;

 if (index < adev->doorbell.num_doorbells) { @@ -586,7 +614,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
  */
 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)  {
-if (adev->in_pci_err_recovery)
+if (amdgpu_device_skip_hw_access(adev))
 return;

 if (index < adev->doorbell.num_doorbells) { @@ -610,10 +638,13 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
 u32 pcie_index, u32 pcie_data,
 u32 reg_addr)
 {
-unsigned long flags;
-u32 r;
 void __iomem *pcie_index_offset;
 void __iomem *pcie_data_offset;
+unsigned long flags;
+u32 r;
+
+if (amdgpu_device_skip_hw_access(adev))
+return 0;

 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; @@ -641,10 +672,13 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
   u32 pcie_index, u32 pcie_data,
   u32 reg_addr)
 {
-unsigned long flags;
-u64 r;
 void __iomem *pcie_index_offset;
 void __iomem *pcie_data_offset;
+unsigned long flags;
+u64 r;
+
+if (amdgpu_device_skip_hw_access(adev))
+return 0;

 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; @@ -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
  u32 pcie_index, u32 pcie_data,
  u32 reg_addr, u32 reg_data)
 {
-unsigned long flags;
 void __iomem *pcie_index_offset;
 void __iomem *pcie_data_offset;
+unsigned long flags;
+
+if (amdgpu_device_skip_hw_access(adev))
+return;

 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; @@ -706,9 +743,12 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
    u32 pcie_index, u32 pcie_data,
    u32 reg_addr, u64 reg_data)
 {
-unsigned long flags;
 void __iomem *pcie_index_offset;
 void __iomem *pcie_data_offset;
+unsigned long flags;
+
+if (amdgpu_device_skip_hw_access(adev))
+return;

 spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
--
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
  2021-03-09  2:03 ` Li, Dennis
@ 2021-03-09 12:56   ` Christian König
  2021-03-09 16:40     ` Andrey Grodzovsky
  2021-03-10  3:12     ` Li, Dennis
  0 siblings, 2 replies; 16+ messages in thread
From: Christian König @ 2021-03-09 12:56 UTC (permalink / raw)
  To: Li, Dennis, amd-gfx; +Cc: Grodzovsky, Andrey

Hi Dennis,

why do you think that this will always assert in reset thread?

In the reset thread while we are holding the reset lock write side 
lockdep_assert_held() should be satisfied and not cause any splat in the 
system log.

Regards,
Christian.

Am 09.03.21 um 03:03 schrieb Li, Dennis:
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi, Christian,
>         amdgpu_device_skip_hw_access will always assert in reset thread, which seems not a good idea.
>
> Best Regards
> Dennis Li
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Tuesday, March 9, 2021 2:07 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Li, Dennis <Dennis.Li@amd.com>
> Subject: [PATCH] drm/amdgpu: capture invalid hardware access v2
>
> From: Dennis Li <Dennis.Li@amd.com>
>
> When recovery thread has begun GPU reset, there should be not other threads to access hardware, otherwise system randomly hang.
>
> v2 (chk): rewritten from scratch, use trylock and lockdep instead of
>    hand wiring the logic.
>
> Signed-off-by: Dennis Li <Dennis.Li@amd.com>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74 +++++++++++++++++-----
>   1 file changed, 57 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index e247c3a2ec08..c990af6a43ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
>   /*
>    * register access helper functions.
>    */
> +
> +/* Check if hw access should be skipped because of hotplug or device
> +error */ static bool amdgpu_device_skip_hw_access(struct amdgpu_device
> +*adev) {
> +if (adev->in_pci_err_recovery)
> +return true;
> +
> +#ifdef CONFIG_LOCKDEP
> +/*
> + * This is a bit complicated to understand, so worth a comment. What we assert
> + * here is that the GPU reset is not running on another thread in parallel.
> + *
> + * For this we trylock the read side of the reset semaphore, if that succeeds
> + * we know that the reset is not running in paralell.
> + *
> + * If the trylock fails we assert that we are either already holding the read
> + * side of the lock or are the reset thread itself and hold the write side of
> + * the lock.
> + */
> +if (down_read_trylock(&adev->reset_sem))
> +up_read(&adev->reset_sem);
> +else
> +lockdep_assert_held(&adev->reset_sem);
> +#endif
> +
> +return false;
> +}
> +
>   /**
>    * amdgpu_device_rreg - read a memory mapped IO or indirect register
>    *
> @@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,  {
>   uint32_t ret;
>
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return 0;
>
>   if ((reg * 4) < adev->rmmio_size) {
> @@ -377,7 +405,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>    */
>   uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)  {
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return 0;
>
>   if (offset < adev->rmmio_size)
> @@ -402,7 +430,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
>    */
>   void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)  {
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return;
>
>   if (offset < adev->rmmio_size)
> @@ -425,7 +453,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
>   uint32_t reg, uint32_t v,
>   uint32_t acc_flags)
>   {
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return;
>
>   if ((reg * 4) < adev->rmmio_size) {
> @@ -452,7 +480,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,  void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>        uint32_t reg, uint32_t v)
>   {
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return;
>
>   if (amdgpu_sriov_fullaccess(adev) &&
> @@ -475,7 +503,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>    */
>   u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)  {
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return 0;
>
>   if ((reg * 4) < adev->rio_mem_size)
> @@ -497,7 +525,7 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
>    */
>   void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)  {
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return;
>
>   if ((reg * 4) < adev->rio_mem_size)
> @@ -519,7 +547,7 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
>    */
>   u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)  {
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return 0;
>
>   if (index < adev->doorbell.num_doorbells) { @@ -542,7 +570,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
>    */
>   void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)  {
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return;
>
>   if (index < adev->doorbell.num_doorbells) { @@ -563,7 +591,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
>    */
>   u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)  {
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return 0;
>
>   if (index < adev->doorbell.num_doorbells) { @@ -586,7 +614,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
>    */
>   void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)  {
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return;
>
>   if (index < adev->doorbell.num_doorbells) { @@ -610,10 +638,13 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
>   u32 pcie_index, u32 pcie_data,
>   u32 reg_addr)
>   {
> -unsigned long flags;
> -u32 r;
>   void __iomem *pcie_index_offset;
>   void __iomem *pcie_data_offset;
> +unsigned long flags;
> +u32 r;
> +
> +if (amdgpu_device_skip_hw_access(adev))
> +return 0;
>
>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; @@ -641,10 +672,13 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
>     u32 pcie_index, u32 pcie_data,
>     u32 reg_addr)
>   {
> -unsigned long flags;
> -u64 r;
>   void __iomem *pcie_index_offset;
>   void __iomem *pcie_data_offset;
> +unsigned long flags;
> +u64 r;
> +
> +if (amdgpu_device_skip_hw_access(adev))
> +return 0;
>
>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; @@ -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
>    u32 pcie_index, u32 pcie_data,
>    u32 reg_addr, u32 reg_data)
>   {
> -unsigned long flags;
>   void __iomem *pcie_index_offset;
>   void __iomem *pcie_data_offset;
> +unsigned long flags;
> +
> +if (amdgpu_device_skip_hw_access(adev))
> +return;
>
>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; @@ -706,9 +743,12 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
>      u32 pcie_index, u32 pcie_data,
>      u32 reg_addr, u64 reg_data)
>   {
> -unsigned long flags;
>   void __iomem *pcie_index_offset;
>   void __iomem *pcie_data_offset;
> +unsigned long flags;
> +
> +if (amdgpu_device_skip_hw_access(adev))
> +return;
>
>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
> --
> 2.25.1
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
  2021-03-09 12:56   ` Christian König
@ 2021-03-09 16:40     ` Andrey Grodzovsky
  2021-03-09 17:47       ` Christian König
  2021-03-10  3:12     ` Li, Dennis
  1 sibling, 1 reply; 16+ messages in thread
From: Andrey Grodzovsky @ 2021-03-09 16:40 UTC (permalink / raw)
  To: Christian König, Li, Dennis, amd-gfx

Because he takes the write side lock post amdgpu_pre_asic_reset - where 
HW suspend sequence happens (touching registers) - so i think it will 
assert.

Andrey

On 2021-03-09 7:56 a.m., Christian König wrote:
> Hi Dennis,
> 
> why do you think that this will always assert in reset thread?
> 
> In the reset thread while we are holding the reset lock write side 
> lockdep_assert_held() should be satisfied and not cause any splat in the 
> system log.
> 
> Regards,
> Christian.
> 
> Am 09.03.21 um 03:03 schrieb Li, Dennis:
>> [AMD Official Use Only - Internal Distribution Only]
>>
>> Hi, Christian,
>>         amdgpu_device_skip_hw_access will always assert in reset 
>> thread, which seems not a good idea.
>>
>> Best Regards
>> Dennis Li
>> -----Original Message-----
>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>> Sent: Tuesday, March 9, 2021 2:07 AM
>> To: amd-gfx@lists.freedesktop.org
>> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Li, Dennis 
>> <Dennis.Li@amd.com>
>> Subject: [PATCH] drm/amdgpu: capture invalid hardware access v2
>>
>> From: Dennis Li <Dennis.Li@amd.com>
>>
>> When recovery thread has begun GPU reset, there should be not other 
>> threads to access hardware, otherwise system randomly hang.
>>
>> v2 (chk): rewritten from scratch, use trylock and lockdep instead of
>>    hand wiring the logic.
>>
>> Signed-off-by: Dennis Li <Dennis.Li@amd.com>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74 +++++++++++++++++-----
>>   1 file changed, 57 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index e247c3a2ec08..c990af6a43ca 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct 
>> amdgpu_device *adev, loff_t pos,
>>   /*
>>    * register access helper functions.
>>    */
>> +
>> +/* Check if hw access should be skipped because of hotplug or device
>> +error */ static bool amdgpu_device_skip_hw_access(struct amdgpu_device
>> +*adev) {
>> +if (adev->in_pci_err_recovery)
>> +return true;
>> +
>> +#ifdef CONFIG_LOCKDEP
>> +/*
>> + * This is a bit complicated to understand, so worth a comment. What 
>> we assert
>> + * here is that the GPU reset is not running on another thread in 
>> parallel.
>> + *
>> + * For this we trylock the read side of the reset semaphore, if that 
>> succeeds
>> + * we know that the reset is not running in paralell.
>> + *
>> + * If the trylock fails we assert that we are either already holding 
>> the read
>> + * side of the lock or are the reset thread itself and hold the write 
>> side of
>> + * the lock.
>> + */
>> +if (down_read_trylock(&adev->reset_sem))
>> +up_read(&adev->reset_sem);
>> +else
>> +lockdep_assert_held(&adev->reset_sem);
>> +#endif
>> +
>> +return false;
>> +}
>> +
>>   /**
>>    * amdgpu_device_rreg - read a memory mapped IO or indirect register
>>    *
>> @@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device 
>> *adev,  {
>>   uint32_t ret;
>>
>> -if (adev->in_pci_err_recovery)
>> +if (amdgpu_device_skip_hw_access(adev))
>>   return 0;
>>
>>   if ((reg * 4) < adev->rmmio_size) {
>> @@ -377,7 +405,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device 
>> *adev,
>>    */
>>   uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)  {
>> -if (adev->in_pci_err_recovery)
>> +if (amdgpu_device_skip_hw_access(adev))
>>   return 0;
>>
>>   if (offset < adev->rmmio_size)
>> @@ -402,7 +430,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device 
>> *adev, uint32_t offset)
>>    */
>>   void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, 
>> uint8_t value)  {
>> -if (adev->in_pci_err_recovery)
>> +if (amdgpu_device_skip_hw_access(adev))
>>   return;
>>
>>   if (offset < adev->rmmio_size)
>> @@ -425,7 +453,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
>>   uint32_t reg, uint32_t v,
>>   uint32_t acc_flags)
>>   {
>> -if (adev->in_pci_err_recovery)
>> +if (amdgpu_device_skip_hw_access(adev))
>>   return;
>>
>>   if ((reg * 4) < adev->rmmio_size) {
>> @@ -452,7 +480,7 @@ void amdgpu_device_wreg(struct amdgpu_device 
>> *adev,  void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>        uint32_t reg, uint32_t v)
>>   {
>> -if (adev->in_pci_err_recovery)
>> +if (amdgpu_device_skip_hw_access(adev))
>>   return;
>>
>>   if (amdgpu_sriov_fullaccess(adev) &&
>> @@ -475,7 +503,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device 
>> *adev,
>>    */
>>   u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)  {
>> -if (adev->in_pci_err_recovery)
>> +if (amdgpu_device_skip_hw_access(adev))
>>   return 0;
>>
>>   if ((reg * 4) < adev->rio_mem_size)
>> @@ -497,7 +525,7 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 
>> reg)
>>    */
>>   void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)  {
>> -if (adev->in_pci_err_recovery)
>> +if (amdgpu_device_skip_hw_access(adev))
>>   return;
>>
>>   if ((reg * 4) < adev->rio_mem_size)
>> @@ -519,7 +547,7 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, 
>> u32 reg, u32 v)
>>    */
>>   u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)  {
>> -if (adev->in_pci_err_recovery)
>> +if (amdgpu_device_skip_hw_access(adev))
>>   return 0;
>>
>>   if (index < adev->doorbell.num_doorbells) { @@ -542,7 +570,7 @@ u32 
>> amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
>>    */
>>   void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 
>> v)  {
>> -if (adev->in_pci_err_recovery)
>> +if (amdgpu_device_skip_hw_access(adev))
>>   return;
>>
>>   if (index < adev->doorbell.num_doorbells) { @@ -563,7 +591,7 @@ void 
>> amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
>>    */
>>   u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)  {
>> -if (adev->in_pci_err_recovery)
>> +if (amdgpu_device_skip_hw_access(adev))
>>   return 0;
>>
>>   if (index < adev->doorbell.num_doorbells) { @@ -586,7 +614,7 @@ u64 
>> amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
>>    */
>>   void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, 
>> u64 v)  {
>> -if (adev->in_pci_err_recovery)
>> +if (amdgpu_device_skip_hw_access(adev))
>>   return;
>>
>>   if (index < adev->doorbell.num_doorbells) { @@ -610,10 +638,13 @@ 
>> u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
>>   u32 pcie_index, u32 pcie_data,
>>   u32 reg_addr)
>>   {
>> -unsigned long flags;
>> -u32 r;
>>   void __iomem *pcie_index_offset;
>>   void __iomem *pcie_data_offset;
>> +unsigned long flags;
>> +u32 r;
>> +
>> +if (amdgpu_device_skip_hw_access(adev))
>> +return 0;
>>
>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; @@ 
>> -641,10 +672,13 @@ u64 amdgpu_device_indirect_rreg64(struct 
>> amdgpu_device *adev,
>>     u32 pcie_index, u32 pcie_data,
>>     u32 reg_addr)
>>   {
>> -unsigned long flags;
>> -u64 r;
>>   void __iomem *pcie_index_offset;
>>   void __iomem *pcie_data_offset;
>> +unsigned long flags;
>> +u64 r;
>> +
>> +if (amdgpu_device_skip_hw_access(adev))
>> +return 0;
>>
>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; @@ 
>> -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct 
>> amdgpu_device *adev,
>>    u32 pcie_index, u32 pcie_data,
>>    u32 reg_addr, u32 reg_data)
>>   {
>> -unsigned long flags;
>>   void __iomem *pcie_index_offset;
>>   void __iomem *pcie_data_offset;
>> +unsigned long flags;
>> +
>> +if (amdgpu_device_skip_hw_access(adev))
>> +return;
>>
>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; @@ 
>> -706,9 +743,12 @@ void amdgpu_device_indirect_wreg64(struct 
>> amdgpu_device *adev,
>>      u32 pcie_index, u32 pcie_data,
>>      u32 reg_addr, u64 reg_data)
>>   {
>> -unsigned long flags;
>>   void __iomem *pcie_index_offset;
>>   void __iomem *pcie_data_offset;
>> +unsigned long flags;
>> +
>> +if (amdgpu_device_skip_hw_access(adev))
>> +return;
>>
>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
>> -- 
>> 2.25.1
>>
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
  2021-03-09 16:40     ` Andrey Grodzovsky
@ 2021-03-09 17:47       ` Christian König
  2021-03-09 18:26         ` Andrey Grodzovsky
  0 siblings, 1 reply; 16+ messages in thread
From: Christian König @ 2021-03-09 17:47 UTC (permalink / raw)
  To: Andrey Grodzovsky, Li, Dennis, amd-gfx

No it won't. Accessing the hardware without the lock is ok as long as 
the write side isn't taken.

But that approach is illegal anyway because we suspend the hardware 
without proper protection from concurrent access.

Christian.

Am 09.03.21 um 17:40 schrieb Andrey Grodzovsky:
> Because he takes the write side lock post amdgpu_pre_asic_reset - 
> where HW suspend sequence happens (touching registers) - so i think it 
> will assert.
>
> Andrey
>
> On 2021-03-09 7:56 a.m., Christian König wrote:
>> Hi Dennis,
>>
>> why do you think that this will always assert in reset thread?
>>
>> In the reset thread while we are holding the reset lock write side 
>> lockdep_assert_held() should be satisfied and not cause any splat in 
>> the system log.
>>
>> Regards,
>> Christian.
>>
>> Am 09.03.21 um 03:03 schrieb Li, Dennis:
>>> [AMD Official Use Only - Internal Distribution Only]
>>>
>>> Hi, Christian,
>>>         amdgpu_device_skip_hw_access will always assert in reset 
>>> thread, which seems not a good idea.
>>>
>>> Best Regards
>>> Dennis Li
>>> -----Original Message-----
>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>> Sent: Tuesday, March 9, 2021 2:07 AM
>>> To: amd-gfx@lists.freedesktop.org
>>> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Li, Dennis 
>>> <Dennis.Li@amd.com>
>>> Subject: [PATCH] drm/amdgpu: capture invalid hardware access v2
>>>
>>> From: Dennis Li <Dennis.Li@amd.com>
>>>
>>> When recovery thread has begun GPU reset, there should be not other 
>>> threads to access hardware, otherwise system randomly hang.
>>>
>>> v2 (chk): rewritten from scratch, use trylock and lockdep instead of
>>>    hand wiring the logic.
>>>
>>> Signed-off-by: Dennis Li <Dennis.Li@amd.com>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74 
>>> +++++++++++++++++-----
>>>   1 file changed, 57 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index e247c3a2ec08..c990af6a43ca 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct 
>>> amdgpu_device *adev, loff_t pos,
>>>   /*
>>>    * register access helper functions.
>>>    */
>>> +
>>> +/* Check if hw access should be skipped because of hotplug or device
>>> +error */ static bool amdgpu_device_skip_hw_access(struct amdgpu_device
>>> +*adev) {
>>> +if (adev->in_pci_err_recovery)
>>> +return true;
>>> +
>>> +#ifdef CONFIG_LOCKDEP
>>> +/*
>>> + * This is a bit complicated to understand, so worth a comment. 
>>> What we assert
>>> + * here is that the GPU reset is not running on another thread in 
>>> parallel.
>>> + *
>>> + * For this we trylock the read side of the reset semaphore, if 
>>> that succeeds
>>> + * we know that the reset is not running in paralell.
>>> + *
>>> + * If the trylock fails we assert that we are either already 
>>> holding the read
>>> + * side of the lock or are the reset thread itself and hold the 
>>> write side of
>>> + * the lock.
>>> + */
>>> +if (down_read_trylock(&adev->reset_sem))
>>> +up_read(&adev->reset_sem);
>>> +else
>>> +lockdep_assert_held(&adev->reset_sem);
>>> +#endif
>>> +
>>> +return false;
>>> +}
>>> +
>>>   /**
>>>    * amdgpu_device_rreg - read a memory mapped IO or indirect register
>>>    *
>>> @@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device 
>>> *adev,  {
>>>   uint32_t ret;
>>>
>>> -if (adev->in_pci_err_recovery)
>>> +if (amdgpu_device_skip_hw_access(adev))
>>>   return 0;
>>>
>>>   if ((reg * 4) < adev->rmmio_size) {
>>> @@ -377,7 +405,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device 
>>> *adev,
>>>    */
>>>   uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t 
>>> offset)  {
>>> -if (adev->in_pci_err_recovery)
>>> +if (amdgpu_device_skip_hw_access(adev))
>>>   return 0;
>>>
>>>   if (offset < adev->rmmio_size)
>>> @@ -402,7 +430,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device 
>>> *adev, uint32_t offset)
>>>    */
>>>   void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, 
>>> uint8_t value)  {
>>> -if (adev->in_pci_err_recovery)
>>> +if (amdgpu_device_skip_hw_access(adev))
>>>   return;
>>>
>>>   if (offset < adev->rmmio_size)
>>> @@ -425,7 +453,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
>>>   uint32_t reg, uint32_t v,
>>>   uint32_t acc_flags)
>>>   {
>>> -if (adev->in_pci_err_recovery)
>>> +if (amdgpu_device_skip_hw_access(adev))
>>>   return;
>>>
>>>   if ((reg * 4) < adev->rmmio_size) {
>>> @@ -452,7 +480,7 @@ void amdgpu_device_wreg(struct amdgpu_device 
>>> *adev,  void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>        uint32_t reg, uint32_t v)
>>>   {
>>> -if (adev->in_pci_err_recovery)
>>> +if (amdgpu_device_skip_hw_access(adev))
>>>   return;
>>>
>>>   if (amdgpu_sriov_fullaccess(adev) &&
>>> @@ -475,7 +503,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct 
>>> amdgpu_device *adev,
>>>    */
>>>   u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)  {
>>> -if (adev->in_pci_err_recovery)
>>> +if (amdgpu_device_skip_hw_access(adev))
>>>   return 0;
>>>
>>>   if ((reg * 4) < adev->rio_mem_size)
>>> @@ -497,7 +525,7 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, 
>>> u32 reg)
>>>    */
>>>   void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)  {
>>> -if (adev->in_pci_err_recovery)
>>> +if (amdgpu_device_skip_hw_access(adev))
>>>   return;
>>>
>>>   if ((reg * 4) < adev->rio_mem_size)
>>> @@ -519,7 +547,7 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, 
>>> u32 reg, u32 v)
>>>    */
>>>   u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)  {
>>> -if (adev->in_pci_err_recovery)
>>> +if (amdgpu_device_skip_hw_access(adev))
>>>   return 0;
>>>
>>>   if (index < adev->doorbell.num_doorbells) { @@ -542,7 +570,7 @@ 
>>> u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
>>>    */
>>>   void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, 
>>> u32 v)  {
>>> -if (adev->in_pci_err_recovery)
>>> +if (amdgpu_device_skip_hw_access(adev))
>>>   return;
>>>
>>>   if (index < adev->doorbell.num_doorbells) { @@ -563,7 +591,7 @@ 
>>> void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
>>>    */
>>>   u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)  {
>>> -if (adev->in_pci_err_recovery)
>>> +if (amdgpu_device_skip_hw_access(adev))
>>>   return 0;
>>>
>>>   if (index < adev->doorbell.num_doorbells) { @@ -586,7 +614,7 @@ 
>>> u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
>>>    */
>>>   void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, 
>>> u64 v)  {
>>> -if (adev->in_pci_err_recovery)
>>> +if (amdgpu_device_skip_hw_access(adev))
>>>   return;
>>>
>>>   if (index < adev->doorbell.num_doorbells) { @@ -610,10 +638,13 @@ 
>>> u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
>>>   u32 pcie_index, u32 pcie_data,
>>>   u32 reg_addr)
>>>   {
>>> -unsigned long flags;
>>> -u32 r;
>>>   void __iomem *pcie_index_offset;
>>>   void __iomem *pcie_data_offset;
>>> +unsigned long flags;
>>> +u32 r;
>>> +
>>> +if (amdgpu_device_skip_hw_access(adev))
>>> +return 0;
>>>
>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 
>>> @@ -641,10 +672,13 @@ u64 amdgpu_device_indirect_rreg64(struct 
>>> amdgpu_device *adev,
>>>     u32 pcie_index, u32 pcie_data,
>>>     u32 reg_addr)
>>>   {
>>> -unsigned long flags;
>>> -u64 r;
>>>   void __iomem *pcie_index_offset;
>>>   void __iomem *pcie_data_offset;
>>> +unsigned long flags;
>>> +u64 r;
>>> +
>>> +if (amdgpu_device_skip_hw_access(adev))
>>> +return 0;
>>>
>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 
>>> @@ -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct 
>>> amdgpu_device *adev,
>>>    u32 pcie_index, u32 pcie_data,
>>>    u32 reg_addr, u32 reg_data)
>>>   {
>>> -unsigned long flags;
>>>   void __iomem *pcie_index_offset;
>>>   void __iomem *pcie_data_offset;
>>> +unsigned long flags;
>>> +
>>> +if (amdgpu_device_skip_hw_access(adev))
>>> +return;
>>>
>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 
>>> @@ -706,9 +743,12 @@ void amdgpu_device_indirect_wreg64(struct 
>>> amdgpu_device *adev,
>>>      u32 pcie_index, u32 pcie_data,
>>>      u32 reg_addr, u64 reg_data)
>>>   {
>>> -unsigned long flags;
>>>   void __iomem *pcie_index_offset;
>>>   void __iomem *pcie_data_offset;
>>> +unsigned long flags;
>>> +
>>> +if (amdgpu_device_skip_hw_access(adev))
>>> +return;
>>>
>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
>>> -- 
>>> 2.25.1
>>>
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
  2021-03-09 17:47       ` Christian König
@ 2021-03-09 18:26         ` Andrey Grodzovsky
  2021-03-09 18:51           ` Christian König
  2021-03-10  2:59           ` Li, Dennis
  0 siblings, 2 replies; 16+ messages in thread
From: Andrey Grodzovsky @ 2021-03-09 18:26 UTC (permalink / raw)
  To: Christian König, Li, Dennis, amd-gfx

On 2021-03-09 12:47 p.m., Christian König wrote:
> No it won't. Accessing the hardware without the lock is ok as long as 
> the write side isn't taken.

Oh, forgot about the trylock part, sorry...

> 
> But that approach is illegal anyway because we suspend the hardware 
> without proper protection from concurrent access.

For my understanding and from looking again at his steps related to this

Step 0: atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) - [AG] protects from 
other TDR threads

Step 1: cancel all delay works, stop drm schedule, complete all 
unreceived fences and so on. Call amdgpu_device_pre_asic_reset... e.t.c
- [AG] this is the HW suspend part

Step 2: call down_write(&adev->reset_sem) to hold write lock, which will
block recovery thread until other threads release read locks.

Is the problem here that HW is suspended while some other threads
that rely on the read side lock still access HW ? Mostly what I am
thinking about are IOCTls - we can't 'wait for them to complete' but
they might be accessing HW when we start suspend.

Andrey


> 
> Christian.
> 
> Am 09.03.21 um 17:40 schrieb Andrey Grodzovsky:
>> Because he takes the write side lock post amdgpu_pre_asic_reset - 
>> where HW suspend sequence happens (touching registers) - so i think it 
>> will assert.
>>
>> Andrey
>>
>> On 2021-03-09 7:56 a.m., Christian König wrote:
>>> Hi Dennis,
>>>
>>> why do you think that this will always assert in reset thread?
>>>
>>> In the reset thread while we are holding the reset lock write side 
>>> lockdep_assert_held() should be satisfied and not cause any splat in 
>>> the system log.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 09.03.21 um 03:03 schrieb Li, Dennis:
>>>> [AMD Official Use Only - Internal Distribution Only]
>>>>
>>>> Hi, Christian,
>>>>         amdgpu_device_skip_hw_access will always assert in reset 
>>>> thread, which seems not a good idea.
>>>>
>>>> Best Regards
>>>> Dennis Li
>>>> -----Original Message-----
>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>> Sent: Tuesday, March 9, 2021 2:07 AM
>>>> To: amd-gfx@lists.freedesktop.org
>>>> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Li, Dennis 
>>>> <Dennis.Li@amd.com>
>>>> Subject: [PATCH] drm/amdgpu: capture invalid hardware access v2
>>>>
>>>> From: Dennis Li <Dennis.Li@amd.com>
>>>>
>>>> When recovery thread has begun GPU reset, there should be not other 
>>>> threads to access hardware, otherwise system randomly hang.
>>>>
>>>> v2 (chk): rewritten from scratch, use trylock and lockdep instead of
>>>>    hand wiring the logic.
>>>>
>>>> Signed-off-by: Dennis Li <Dennis.Li@amd.com>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74 
>>>> +++++++++++++++++-----
>>>>   1 file changed, 57 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index e247c3a2ec08..c990af6a43ca 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct 
>>>> amdgpu_device *adev, loff_t pos,
>>>>   /*
>>>>    * register access helper functions.
>>>>    */
>>>> +
>>>> +/* Check if hw access should be skipped because of hotplug or device
>>>> +error */ static bool amdgpu_device_skip_hw_access(struct amdgpu_device
>>>> +*adev) {
>>>> +if (adev->in_pci_err_recovery)
>>>> +return true;
>>>> +
>>>> +#ifdef CONFIG_LOCKDEP
>>>> +/*
>>>> + * This is a bit complicated to understand, so worth a comment. 
>>>> What we assert
>>>> + * here is that the GPU reset is not running on another thread in 
>>>> parallel.
>>>> + *
>>>> + * For this we trylock the read side of the reset semaphore, if 
>>>> that succeeds
>>>> + * we know that the reset is not running in paralell.
>>>> + *
>>>> + * If the trylock fails we assert that we are either already 
>>>> holding the read
>>>> + * side of the lock or are the reset thread itself and hold the 
>>>> write side of
>>>> + * the lock.
>>>> + */
>>>> +if (down_read_trylock(&adev->reset_sem))
>>>> +up_read(&adev->reset_sem);
>>>> +else
>>>> +lockdep_assert_held(&adev->reset_sem);
>>>> +#endif
>>>> +
>>>> +return false;
>>>> +}
>>>> +
>>>>   /**
>>>>    * amdgpu_device_rreg - read a memory mapped IO or indirect register
>>>>    *
>>>> @@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device 
>>>> *adev,  {
>>>>   uint32_t ret;
>>>>
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return 0;
>>>>
>>>>   if ((reg * 4) < adev->rmmio_size) {
>>>> @@ -377,7 +405,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device 
>>>> *adev,
>>>>    */
>>>>   uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t 
>>>> offset)  {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return 0;
>>>>
>>>>   if (offset < adev->rmmio_size)
>>>> @@ -402,7 +430,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device 
>>>> *adev, uint32_t offset)
>>>>    */
>>>>   void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, 
>>>> uint8_t value)  {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return;
>>>>
>>>>   if (offset < adev->rmmio_size)
>>>> @@ -425,7 +453,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
>>>>   uint32_t reg, uint32_t v,
>>>>   uint32_t acc_flags)
>>>>   {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return;
>>>>
>>>>   if ((reg * 4) < adev->rmmio_size) {
>>>> @@ -452,7 +480,7 @@ void amdgpu_device_wreg(struct amdgpu_device 
>>>> *adev,  void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>>        uint32_t reg, uint32_t v)
>>>>   {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return;
>>>>
>>>>   if (amdgpu_sriov_fullaccess(adev) &&
>>>> @@ -475,7 +503,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct 
>>>> amdgpu_device *adev,
>>>>    */
>>>>   u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)  {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return 0;
>>>>
>>>>   if ((reg * 4) < adev->rio_mem_size)
>>>> @@ -497,7 +525,7 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, 
>>>> u32 reg)
>>>>    */
>>>>   void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)  {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return;
>>>>
>>>>   if ((reg * 4) < adev->rio_mem_size)
>>>> @@ -519,7 +547,7 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, 
>>>> u32 reg, u32 v)
>>>>    */
>>>>   u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)  {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return 0;
>>>>
>>>>   if (index < adev->doorbell.num_doorbells) { @@ -542,7 +570,7 @@ 
>>>> u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
>>>>    */
>>>>   void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, 
>>>> u32 v)  {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return;
>>>>
>>>>   if (index < adev->doorbell.num_doorbells) { @@ -563,7 +591,7 @@ 
>>>> void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
>>>>    */
>>>>   u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)  {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return 0;
>>>>
>>>>   if (index < adev->doorbell.num_doorbells) { @@ -586,7 +614,7 @@ 
>>>> u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
>>>>    */
>>>>   void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, 
>>>> u64 v)  {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return;
>>>>
>>>>   if (index < adev->doorbell.num_doorbells) { @@ -610,10 +638,13 @@ 
>>>> u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
>>>>   u32 pcie_index, u32 pcie_data,
>>>>   u32 reg_addr)
>>>>   {
>>>> -unsigned long flags;
>>>> -u32 r;
>>>>   void __iomem *pcie_index_offset;
>>>>   void __iomem *pcie_data_offset;
>>>> +unsigned long flags;
>>>> +u32 r;
>>>> +
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>> +return 0;
>>>>
>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 
>>>> @@ -641,10 +672,13 @@ u64 amdgpu_device_indirect_rreg64(struct 
>>>> amdgpu_device *adev,
>>>>     u32 pcie_index, u32 pcie_data,
>>>>     u32 reg_addr)
>>>>   {
>>>> -unsigned long flags;
>>>> -u64 r;
>>>>   void __iomem *pcie_index_offset;
>>>>   void __iomem *pcie_data_offset;
>>>> +unsigned long flags;
>>>> +u64 r;
>>>> +
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>> +return 0;
>>>>
>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 
>>>> @@ -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct 
>>>> amdgpu_device *adev,
>>>>    u32 pcie_index, u32 pcie_data,
>>>>    u32 reg_addr, u32 reg_data)
>>>>   {
>>>> -unsigned long flags;
>>>>   void __iomem *pcie_index_offset;
>>>>   void __iomem *pcie_data_offset;
>>>> +unsigned long flags;
>>>> +
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>> +return;
>>>>
>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; 
>>>> @@ -706,9 +743,12 @@ void amdgpu_device_indirect_wreg64(struct 
>>>> amdgpu_device *adev,
>>>>      u32 pcie_index, u32 pcie_data,
>>>>      u32 reg_addr, u64 reg_data)
>>>>   {
>>>> -unsigned long flags;
>>>>   void __iomem *pcie_index_offset;
>>>>   void __iomem *pcie_data_offset;
>>>> +unsigned long flags;
>>>> +
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>> +return;
>>>>
>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
>>>> -- 
>>>> 2.25.1
>>>>
>>>
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
  2021-03-09 18:26         ` Andrey Grodzovsky
@ 2021-03-09 18:51           ` Christian König
  2021-03-09 19:13             ` Andrey Grodzovsky
  2021-03-10  2:59           ` Li, Dennis
  1 sibling, 1 reply; 16+ messages in thread
From: Christian König @ 2021-03-09 18:51 UTC (permalink / raw)
  To: Andrey Grodzovsky, Li, Dennis, amd-gfx, Daniel Vetter

Am 09.03.21 um 19:26 schrieb Andrey Grodzovsky:
> On 2021-03-09 12:47 p.m., Christian König wrote:
>> No it won't. Accessing the hardware without the lock is ok as long as 
>> the write side isn't taken.
>
> Oh, forgot about the trylock part, sorry...
>
>>
>> But that approach is illegal anyway because we suspend the hardware 
>> without proper protection from concurrent access.
>
> For my understanding and from looking again at his steps related to this
>
> Step 0: atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) - [AG] protects from 
> other TDR threads
>
> Step 1: cancel all delay works, stop drm schedule, complete all 
> unreceived fences and so on. Call amdgpu_device_pre_asic_reset... e.t.c
> - [AG] this is the HW suspend part
>
> Step 2: call down_write(&adev->reset_sem) to hold write lock, which will
> block recovery thread until other threads release read locks.
>
> Is the problem here that HW is suspended while some other threads
> that rely on the read side lock still access HW ?

Exactly that, yes.

> Mostly what I am
> thinking about are IOCTls - we can't 'wait for them to complete' but
> they might be accessing HW when we start suspend.

Well, the bigger problem with the IOCTLs is that I can't seem to be able 
to properly explain the dependency here.

Maybe let me describe it from the other side once more to make it more 
understandable:

Core Linux memory management depends on waiting for dma_fences. Amdgpus 
dma_fences in turn depend on the GPU scheduler and recovery/reset 
functionality for proper functionality.

So every call to kmalloc() or get_free_page() depends on the amdgpu GPU 
recover/reset function and with it every lock taken in that function.

Because of this you can't take any lock under which memory allocation 
might happen while holding the reset lock.

This is a hard and by now very well documented limitation and Dennis 
changes here doesn't magically remove that.

Because of this taking the reset lock or any other lock used in the GPU 
reset functionality at the beginning of our IOCTLs will never work 
correctly.

What you need to do is to have all locks which depend on MM taken before 
your take the reset lock. E.g. dma_resv, mmap_sem, VM locks etc etc.. As 
well as not call functions like kmalloc, get_free_page, dma_fence_wait 
etc etc

I hope that somehow makes it more clear now. We should probably try to 
merge Daniels patch set for teaching lockdep to complain about such 
things sooner or later.

With those patches merged lockdep starts to complain about the problems 
as soon as you try to run a kernel with that enabled.

Regards,
Christian.

>
> Andrey
>
>
>>
>> Christian.
>>
>> Am 09.03.21 um 17:40 schrieb Andrey Grodzovsky:
>>> Because he takes the write side lock post amdgpu_pre_asic_reset - 
>>> where HW suspend sequence happens (touching registers) - so i think 
>>> it will assert.
>>>
>>> Andrey
>>>
>>> On 2021-03-09 7:56 a.m., Christian König wrote:
>>>> Hi Dennis,
>>>>
>>>> why do you think that this will always assert in reset thread?
>>>>
>>>> In the reset thread while we are holding the reset lock write side 
>>>> lockdep_assert_held() should be satisfied and not cause any splat 
>>>> in the system log.
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>> Am 09.03.21 um 03:03 schrieb Li, Dennis:
>>>>> [AMD Official Use Only - Internal Distribution Only]
>>>>>
>>>>> Hi, Christian,
>>>>>         amdgpu_device_skip_hw_access will always assert in reset 
>>>>> thread, which seems not a good idea.
>>>>>
>>>>> Best Regards
>>>>> Dennis Li
>>>>> -----Original Message-----
>>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>>> Sent: Tuesday, March 9, 2021 2:07 AM
>>>>> To: amd-gfx@lists.freedesktop.org
>>>>> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Li, Dennis 
>>>>> <Dennis.Li@amd.com>
>>>>> Subject: [PATCH] drm/amdgpu: capture invalid hardware access v2
>>>>>
>>>>> From: Dennis Li <Dennis.Li@amd.com>
>>>>>
>>>>> When recovery thread has begun GPU reset, there should be not 
>>>>> other threads to access hardware, otherwise system randomly hang.
>>>>>
>>>>> v2 (chk): rewritten from scratch, use trylock and lockdep instead of
>>>>>    hand wiring the logic.
>>>>>
>>>>> Signed-off-by: Dennis Li <Dennis.Li@amd.com>
>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74 
>>>>> +++++++++++++++++-----
>>>>>   1 file changed, 57 insertions(+), 17 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> index e247c3a2ec08..c990af6a43ca 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> @@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct 
>>>>> amdgpu_device *adev, loff_t pos,
>>>>>   /*
>>>>>    * register access helper functions.
>>>>>    */
>>>>> +
>>>>> +/* Check if hw access should be skipped because of hotplug or device
>>>>> +error */ static bool amdgpu_device_skip_hw_access(struct 
>>>>> amdgpu_device
>>>>> +*adev) {
>>>>> +if (adev->in_pci_err_recovery)
>>>>> +return true;
>>>>> +
>>>>> +#ifdef CONFIG_LOCKDEP
>>>>> +/*
>>>>> + * This is a bit complicated to understand, so worth a comment. 
>>>>> What we assert
>>>>> + * here is that the GPU reset is not running on another thread in 
>>>>> parallel.
>>>>> + *
>>>>> + * For this we trylock the read side of the reset semaphore, if 
>>>>> that succeeds
>>>>> + * we know that the reset is not running in paralell.
>>>>> + *
>>>>> + * If the trylock fails we assert that we are either already 
>>>>> holding the read
>>>>> + * side of the lock or are the reset thread itself and hold the 
>>>>> write side of
>>>>> + * the lock.
>>>>> + */
>>>>> +if (down_read_trylock(&adev->reset_sem))
>>>>> +up_read(&adev->reset_sem);
>>>>> +else
>>>>> +lockdep_assert_held(&adev->reset_sem);
>>>>> +#endif
>>>>> +
>>>>> +return false;
>>>>> +}
>>>>> +
>>>>>   /**
>>>>>    * amdgpu_device_rreg - read a memory mapped IO or indirect 
>>>>> register
>>>>>    *
>>>>> @@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct 
>>>>> amdgpu_device *adev,  {
>>>>>   uint32_t ret;
>>>>>
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>   return 0;
>>>>>
>>>>>   if ((reg * 4) < adev->rmmio_size) {
>>>>> @@ -377,7 +405,7 @@ uint32_t amdgpu_device_rreg(struct 
>>>>> amdgpu_device *adev,
>>>>>    */
>>>>>   uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t 
>>>>> offset)  {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>   return 0;
>>>>>
>>>>>   if (offset < adev->rmmio_size)
>>>>> @@ -402,7 +430,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device 
>>>>> *adev, uint32_t offset)
>>>>>    */
>>>>>   void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t 
>>>>> offset, uint8_t value)  {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>   return;
>>>>>
>>>>>   if (offset < adev->rmmio_size)
>>>>> @@ -425,7 +453,7 @@ void amdgpu_device_wreg(struct amdgpu_device 
>>>>> *adev,
>>>>>   uint32_t reg, uint32_t v,
>>>>>   uint32_t acc_flags)
>>>>>   {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>   return;
>>>>>
>>>>>   if ((reg * 4) < adev->rmmio_size) {
>>>>> @@ -452,7 +480,7 @@ void amdgpu_device_wreg(struct amdgpu_device 
>>>>> *adev,  void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>>>        uint32_t reg, uint32_t v)
>>>>>   {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>   return;
>>>>>
>>>>>   if (amdgpu_sriov_fullaccess(adev) &&
>>>>> @@ -475,7 +503,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct 
>>>>> amdgpu_device *adev,
>>>>>    */
>>>>>   u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>   return 0;
>>>>>
>>>>>   if ((reg * 4) < adev->rio_mem_size)
>>>>> @@ -497,7 +525,7 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, 
>>>>> u32 reg)
>>>>>    */
>>>>>   void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)  {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>   return;
>>>>>
>>>>>   if ((reg * 4) < adev->rio_mem_size)
>>>>> @@ -519,7 +547,7 @@ void amdgpu_io_wreg(struct amdgpu_device 
>>>>> *adev, u32 reg, u32 v)
>>>>>    */
>>>>>   u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)  {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>   return 0;
>>>>>
>>>>>   if (index < adev->doorbell.num_doorbells) { @@ -542,7 +570,7 @@ 
>>>>> u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
>>>>>    */
>>>>>   void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, 
>>>>> u32 v)  {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>   return;
>>>>>
>>>>>   if (index < adev->doorbell.num_doorbells) { @@ -563,7 +591,7 @@ 
>>>>> void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, 
>>>>> u32 v)
>>>>>    */
>>>>>   u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)  {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>   return 0;
>>>>>
>>>>>   if (index < adev->doorbell.num_doorbells) { @@ -586,7 +614,7 @@ 
>>>>> u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
>>>>>    */
>>>>>   void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 
>>>>> index, u64 v)  {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>   return;
>>>>>
>>>>>   if (index < adev->doorbell.num_doorbells) { @@ -610,10 +638,13 
>>>>> @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
>>>>>   u32 pcie_index, u32 pcie_data,
>>>>>   u32 reg_addr)
>>>>>   {
>>>>> -unsigned long flags;
>>>>> -u32 r;
>>>>>   void __iomem *pcie_index_offset;
>>>>>   void __iomem *pcie_data_offset;
>>>>> +unsigned long flags;
>>>>> +u32 r;
>>>>> +
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>> +return 0;
>>>>>
>>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 
>>>>> 4; @@ -641,10 +672,13 @@ u64 amdgpu_device_indirect_rreg64(struct 
>>>>> amdgpu_device *adev,
>>>>>     u32 pcie_index, u32 pcie_data,
>>>>>     u32 reg_addr)
>>>>>   {
>>>>> -unsigned long flags;
>>>>> -u64 r;
>>>>>   void __iomem *pcie_index_offset;
>>>>>   void __iomem *pcie_data_offset;
>>>>> +unsigned long flags;
>>>>> +u64 r;
>>>>> +
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>> +return 0;
>>>>>
>>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 
>>>>> 4; @@ -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct 
>>>>> amdgpu_device *adev,
>>>>>    u32 pcie_index, u32 pcie_data,
>>>>>    u32 reg_addr, u32 reg_data)
>>>>>   {
>>>>> -unsigned long flags;
>>>>>   void __iomem *pcie_index_offset;
>>>>>   void __iomem *pcie_data_offset;
>>>>> +unsigned long flags;
>>>>> +
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>> +return;
>>>>>
>>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 
>>>>> 4; @@ -706,9 +743,12 @@ void amdgpu_device_indirect_wreg64(struct 
>>>>> amdgpu_device *adev,
>>>>>      u32 pcie_index, u32 pcie_data,
>>>>>      u32 reg_addr, u64 reg_data)
>>>>>   {
>>>>> -unsigned long flags;
>>>>>   void __iomem *pcie_index_offset;
>>>>>   void __iomem *pcie_data_offset;
>>>>> +unsigned long flags;
>>>>> +
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>> +return;
>>>>>
>>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
>>>>> -- 
>>>>> 2.25.1
>>>>>
>>>>
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
  2021-03-09 18:51           ` Christian König
@ 2021-03-09 19:13             ` Andrey Grodzovsky
  2021-03-09 21:13               ` Daniel Vetter
  0 siblings, 1 reply; 16+ messages in thread
From: Andrey Grodzovsky @ 2021-03-09 19:13 UTC (permalink / raw)
  To: Christian König, Li, Dennis, amd-gfx, Daniel Vetter

On 2021-03-09 1:51 p.m., Christian König wrote:
> Am 09.03.21 um 19:26 schrieb Andrey Grodzovsky:
>> On 2021-03-09 12:47 p.m., Christian König wrote:
>>> No it won't. Accessing the hardware without the lock is ok as long as 
>>> the write side isn't taken.
>>
>> Oh, forgot about the trylock part, sorry...
>>
>>>
>>> But that approach is illegal anyway because we suspend the hardware 
>>> without proper protection from concurrent access.
>>
>> For my understanding and from looking again at his steps related to this
>>
>> Step 0: atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) - [AG] protects from 
>> other TDR threads
>>
>> Step 1: cancel all delay works, stop drm schedule, complete all 
>> unreceived fences and so on. Call amdgpu_device_pre_asic_reset... e.t.c
>> - [AG] this is the HW suspend part
>>
>> Step 2: call down_write(&adev->reset_sem) to hold write lock, which will
>> block recovery thread until other threads release read locks.
>>
>> Is the problem here that HW is suspended while some other threads
>> that rely on the read side lock still access HW ?
> 
> Exactly that, yes.
> 
>> Mostly what I am
>> thinking about are IOCTls - we can't 'wait for them to complete' but
>> they might be accessing HW when we start suspend.
> 
> Well, the bigger problem with the IOCTLs is that I can't seem to be able 
> to properly explain the dependency here.
> 
> Maybe let me describe it from the other side once more to make it more 
> understandable:
> 
> Core Linux memory management depends on waiting for dma_fences. Amdgpus 
> dma_fences in turn depend on the GPU scheduler and recovery/reset 
> functionality for proper functionality.
> 
> So every call to kmalloc() or get_free_page() depends on the amdgpu GPU 
> recover/reset function and with it every lock taken in that function.
> 
> Because of this you can't take any lock under which memory allocation 
> might happen while holding the reset lock.
> 
> This is a hard and by now very well documented limitation and Dennis 
> changes here doesn't magically remove that.
> 
> Because of this taking the reset lock or any other lock used in the GPU 
> reset functionality at the beginning of our IOCTLs will never work 
> correctly.
> 
> What you need to do is to have all locks which depend on MM taken before 
> your take the reset lock. E.g. dma_resv, mmap_sem, VM locks etc etc.. As 
> well as not call functions like kmalloc, get_free_page, dma_fence_wait 
> etc etc
> 
> I hope that somehow makes it more clear now. We should probably try to 
> merge Daniels patch set for teaching lockdep to complain about such 
> things sooner or later.

It is now more clear then in the previous discussion. One
thing that would be helpful is also to see the kernel documentation
about this limitation - can you maybe point in kernel DOC or even in
code to such documentation ?

Andrey


> 
> With those patches merged lockdep starts to complain about the problems 
> as soon as you try to run a kernel with that enabled.
> 
> Regards,
> Christian.
> 
>>
>> Andrey
>>
>>
>>>
>>> Christian.
>>>
>>> Am 09.03.21 um 17:40 schrieb Andrey Grodzovsky:
>>>> Because he takes the write side lock post amdgpu_pre_asic_reset - 
>>>> where HW suspend sequence happens (touching registers) - so i think 
>>>> it will assert.
>>>>
>>>> Andrey
>>>>
>>>> On 2021-03-09 7:56 a.m., Christian König wrote:
>>>>> Hi Dennis,
>>>>>
>>>>> why do you think that this will always assert in reset thread?
>>>>>
>>>>> In the reset thread while we are holding the reset lock write side 
>>>>> lockdep_assert_held() should be satisfied and not cause any splat 
>>>>> in the system log.
>>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>> Am 09.03.21 um 03:03 schrieb Li, Dennis:
>>>>>> [AMD Official Use Only - Internal Distribution Only]
>>>>>>
>>>>>> Hi, Christian,
>>>>>>         amdgpu_device_skip_hw_access will always assert in reset 
>>>>>> thread, which seems not a good idea.
>>>>>>
>>>>>> Best Regards
>>>>>> Dennis Li
>>>>>> -----Original Message-----
>>>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>>>> Sent: Tuesday, March 9, 2021 2:07 AM
>>>>>> To: amd-gfx@lists.freedesktop.org
>>>>>> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Li, Dennis 
>>>>>> <Dennis.Li@amd.com>
>>>>>> Subject: [PATCH] drm/amdgpu: capture invalid hardware access v2
>>>>>>
>>>>>> From: Dennis Li <Dennis.Li@amd.com>
>>>>>>
>>>>>> When recovery thread has begun GPU reset, there should be not 
>>>>>> other threads to access hardware, otherwise system randomly hang.
>>>>>>
>>>>>> v2 (chk): rewritten from scratch, use trylock and lockdep instead of
>>>>>>    hand wiring the logic.
>>>>>>
>>>>>> Signed-off-by: Dennis Li <Dennis.Li@amd.com>
>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>> ---
>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74 
>>>>>> +++++++++++++++++-----
>>>>>>   1 file changed, 57 insertions(+), 17 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> index e247c3a2ec08..c990af6a43ca 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> @@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct 
>>>>>> amdgpu_device *adev, loff_t pos,
>>>>>>   /*
>>>>>>    * register access helper functions.
>>>>>>    */
>>>>>> +
>>>>>> +/* Check if hw access should be skipped because of hotplug or device
>>>>>> +error */ static bool amdgpu_device_skip_hw_access(struct 
>>>>>> amdgpu_device
>>>>>> +*adev) {
>>>>>> +if (adev->in_pci_err_recovery)
>>>>>> +return true;
>>>>>> +
>>>>>> +#ifdef CONFIG_LOCKDEP
>>>>>> +/*
>>>>>> + * This is a bit complicated to understand, so worth a comment. 
>>>>>> What we assert
>>>>>> + * here is that the GPU reset is not running on another thread in 
>>>>>> parallel.
>>>>>> + *
>>>>>> + * For this we trylock the read side of the reset semaphore, if 
>>>>>> that succeeds
>>>>>> + * we know that the reset is not running in paralell.
>>>>>> + *
>>>>>> + * If the trylock fails we assert that we are either already 
>>>>>> holding the read
>>>>>> + * side of the lock or are the reset thread itself and hold the 
>>>>>> write side of
>>>>>> + * the lock.
>>>>>> + */
>>>>>> +if (down_read_trylock(&adev->reset_sem))
>>>>>> +up_read(&adev->reset_sem);
>>>>>> +else
>>>>>> +lockdep_assert_held(&adev->reset_sem);
>>>>>> +#endif
>>>>>> +
>>>>>> +return false;
>>>>>> +}
>>>>>> +
>>>>>>   /**
>>>>>>    * amdgpu_device_rreg - read a memory mapped IO or indirect 
>>>>>> register
>>>>>>    *
>>>>>> @@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct 
>>>>>> amdgpu_device *adev,  {
>>>>>>   uint32_t ret;
>>>>>>
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>   return 0;
>>>>>>
>>>>>>   if ((reg * 4) < adev->rmmio_size) {
>>>>>> @@ -377,7 +405,7 @@ uint32_t amdgpu_device_rreg(struct 
>>>>>> amdgpu_device *adev,
>>>>>>    */
>>>>>>   uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t 
>>>>>> offset)  {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>   return 0;
>>>>>>
>>>>>>   if (offset < adev->rmmio_size)
>>>>>> @@ -402,7 +430,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device 
>>>>>> *adev, uint32_t offset)
>>>>>>    */
>>>>>>   void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t 
>>>>>> offset, uint8_t value)  {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>   return;
>>>>>>
>>>>>>   if (offset < adev->rmmio_size)
>>>>>> @@ -425,7 +453,7 @@ void amdgpu_device_wreg(struct amdgpu_device 
>>>>>> *adev,
>>>>>>   uint32_t reg, uint32_t v,
>>>>>>   uint32_t acc_flags)
>>>>>>   {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>   return;
>>>>>>
>>>>>>   if ((reg * 4) < adev->rmmio_size) {
>>>>>> @@ -452,7 +480,7 @@ void amdgpu_device_wreg(struct amdgpu_device 
>>>>>> *adev,  void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>>>>        uint32_t reg, uint32_t v)
>>>>>>   {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>   return;
>>>>>>
>>>>>>   if (amdgpu_sriov_fullaccess(adev) &&
>>>>>> @@ -475,7 +503,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct 
>>>>>> amdgpu_device *adev,
>>>>>>    */
>>>>>>   u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>   return 0;
>>>>>>
>>>>>>   if ((reg * 4) < adev->rio_mem_size)
>>>>>> @@ -497,7 +525,7 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, 
>>>>>> u32 reg)
>>>>>>    */
>>>>>>   void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)  {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>   return;
>>>>>>
>>>>>>   if ((reg * 4) < adev->rio_mem_size)
>>>>>> @@ -519,7 +547,7 @@ void amdgpu_io_wreg(struct amdgpu_device 
>>>>>> *adev, u32 reg, u32 v)
>>>>>>    */
>>>>>>   u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)  {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>   return 0;
>>>>>>
>>>>>>   if (index < adev->doorbell.num_doorbells) { @@ -542,7 +570,7 @@ 
>>>>>> u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
>>>>>>    */
>>>>>>   void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, 
>>>>>> u32 v)  {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>   return;
>>>>>>
>>>>>>   if (index < adev->doorbell.num_doorbells) { @@ -563,7 +591,7 @@ 
>>>>>> void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, 
>>>>>> u32 v)
>>>>>>    */
>>>>>>   u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)  {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>   return 0;
>>>>>>
>>>>>>   if (index < adev->doorbell.num_doorbells) { @@ -586,7 +614,7 @@ 
>>>>>> u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
>>>>>>    */
>>>>>>   void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 
>>>>>> index, u64 v)  {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>   return;
>>>>>>
>>>>>>   if (index < adev->doorbell.num_doorbells) { @@ -610,10 +638,13 
>>>>>> @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
>>>>>>   u32 pcie_index, u32 pcie_data,
>>>>>>   u32 reg_addr)
>>>>>>   {
>>>>>> -unsigned long flags;
>>>>>> -u32 r;
>>>>>>   void __iomem *pcie_index_offset;
>>>>>>   void __iomem *pcie_data_offset;
>>>>>> +unsigned long flags;
>>>>>> +u32 r;
>>>>>> +
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>> +return 0;
>>>>>>
>>>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 
>>>>>> 4; @@ -641,10 +672,13 @@ u64 amdgpu_device_indirect_rreg64(struct 
>>>>>> amdgpu_device *adev,
>>>>>>     u32 pcie_index, u32 pcie_data,
>>>>>>     u32 reg_addr)
>>>>>>   {
>>>>>> -unsigned long flags;
>>>>>> -u64 r;
>>>>>>   void __iomem *pcie_index_offset;
>>>>>>   void __iomem *pcie_data_offset;
>>>>>> +unsigned long flags;
>>>>>> +u64 r;
>>>>>> +
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>> +return 0;
>>>>>>
>>>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 
>>>>>> 4; @@ -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct 
>>>>>> amdgpu_device *adev,
>>>>>>    u32 pcie_index, u32 pcie_data,
>>>>>>    u32 reg_addr, u32 reg_data)
>>>>>>   {
>>>>>> -unsigned long flags;
>>>>>>   void __iomem *pcie_index_offset;
>>>>>>   void __iomem *pcie_data_offset;
>>>>>> +unsigned long flags;
>>>>>> +
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>> +return;
>>>>>>
>>>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 
>>>>>> 4; @@ -706,9 +743,12 @@ void amdgpu_device_indirect_wreg64(struct 
>>>>>> amdgpu_device *adev,
>>>>>>      u32 pcie_index, u32 pcie_data,
>>>>>>      u32 reg_addr, u64 reg_data)
>>>>>>   {
>>>>>> -unsigned long flags;
>>>>>>   void __iomem *pcie_index_offset;
>>>>>>   void __iomem *pcie_data_offset;
>>>>>> +unsigned long flags;
>>>>>> +
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>> +return;
>>>>>>
>>>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
>>>>>> -- 
>>>>>> 2.25.1
>>>>>>
>>>>>
>>>
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
  2021-03-09 19:13             ` Andrey Grodzovsky
@ 2021-03-09 21:13               ` Daniel Vetter
  0 siblings, 0 replies; 16+ messages in thread
From: Daniel Vetter @ 2021-03-09 21:13 UTC (permalink / raw)
  To: Andrey Grodzovsky; +Cc: Christian König, amd-gfx, Li, Dennis

On Tue, Mar 9, 2021 at 8:13 PM Andrey Grodzovsky
<andrey.grodzovsky@amd.com> wrote:
>
> On 2021-03-09 1:51 p.m., Christian König wrote:
> > Am 09.03.21 um 19:26 schrieb Andrey Grodzovsky:
> >> On 2021-03-09 12:47 p.m., Christian König wrote:
> >>> No it won't. Accessing the hardware without the lock is ok as long as
> >>> the write side isn't taken.
> >>
> >> Oh, forgot about the trylock part, sorry...
> >>
> >>>
> >>> But that approach is illegal anyway because we suspend the hardware
> >>> without proper protection from concurrent access.
> >>
> >> For my understanding and from looking again at his steps related to this
> >>
> >> Step 0: atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) - [AG] protects from
> >> other TDR threads
> >>
> >> Step 1: cancel all delay works, stop drm schedule, complete all
> >> unreceived fences and so on. Call amdgpu_device_pre_asic_reset... e.t.c
> >> - [AG] this is the HW suspend part
> >>
> >> Step 2: call down_write(&adev->reset_sem) to hold write lock, which will
> >> block recovery thread until other threads release read locks.
> >>
> >> Is the problem here that HW is suspended while some other threads
> >> that rely on the read side lock still access HW ?
> >
> > Exactly that, yes.
> >
> >> Mostly what I am
> >> thinking about are IOCTls - we can't 'wait for them to complete' but
> >> they might be accessing HW when we start suspend.
> >
> > Well, the bigger problem with the IOCTLs is that I can't seem to be able
> > to properly explain the dependency here.
> >
> > Maybe let me describe it from the other side once more to make it more
> > understandable:
> >
> > Core Linux memory management depends on waiting for dma_fences. Amdgpus
> > dma_fences in turn depend on the GPU scheduler and recovery/reset
> > functionality for proper functionality.
> >
> > So every call to kmalloc() or get_free_page() depends on the amdgpu GPU
> > recover/reset function and with it every lock taken in that function.
> >
> > Because of this you can't take any lock under which memory allocation
> > might happen while holding the reset lock.
> >
> > This is a hard and by now very well documented limitation and Dennis
> > changes here doesn't magically remove that.
> >
> > Because of this taking the reset lock or any other lock used in the GPU
> > reset functionality at the beginning of our IOCTLs will never work
> > correctly.
> >
> > What you need to do is to have all locks which depend on MM taken before
> > your take the reset lock. E.g. dma_resv, mmap_sem, VM locks etc etc.. As
> > well as not call functions like kmalloc, get_free_page, dma_fence_wait
> > etc etc
> >
> > I hope that somehow makes it more clear now. We should probably try to
> > merge Daniels patch set for teaching lockdep to complain about such
> > things sooner or later.
>
> It is now more clear then in the previous discussion. One
> thing that would be helpful is also to see the kernel documentation
> about this limitation - can you maybe point in kernel DOC or even in
> code to such documentation ?

For the annotations:

https://dri.freedesktop.org/docs/drm/driver-api/dma-buf.html?highlight=dma_fence#dma-fence-signalling-annotations

I need to brush up my drm/scheduler annotation patches, those should
hit all the important cases.

Wrt documenting the overall locking hierarchy, best place is probably:

https://cgit.freedesktop.org/drm/drm/tree/drivers/dma-buf/dma-resv.c#n96

Meaning there's no explicit (kerneldoc) documentation about the
nesting, but we at least try to teach lockdep about them directly.

Cheers, Daniel

> Andrey
>
>
> >
> > With those patches merged lockdep starts to complain about the problems
> > as soon as you try to run a kernel with that enabled.
> >
> > Regards,
> > Christian.
> >
> >>
> >> Andrey
> >>
> >>
> >>>
> >>> Christian.
> >>>
> >>> Am 09.03.21 um 17:40 schrieb Andrey Grodzovsky:
> >>>> Because he takes the write side lock post amdgpu_pre_asic_reset -
> >>>> where HW suspend sequence happens (touching registers) - so i think
> >>>> it will assert.
> >>>>
> >>>> Andrey
> >>>>
> >>>> On 2021-03-09 7:56 a.m., Christian König wrote:
> >>>>> Hi Dennis,
> >>>>>
> >>>>> why do you think that this will always assert in reset thread?
> >>>>>
> >>>>> In the reset thread while we are holding the reset lock write side
> >>>>> lockdep_assert_held() should be satisfied and not cause any splat
> >>>>> in the system log.
> >>>>>
> >>>>> Regards,
> >>>>> Christian.
> >>>>>
> >>>>> Am 09.03.21 um 03:03 schrieb Li, Dennis:
> >>>>>> [AMD Official Use Only - Internal Distribution Only]
> >>>>>>
> >>>>>> Hi, Christian,
> >>>>>>         amdgpu_device_skip_hw_access will always assert in reset
> >>>>>> thread, which seems not a good idea.
> >>>>>>
> >>>>>> Best Regards
> >>>>>> Dennis Li
> >>>>>> -----Original Message-----
> >>>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> >>>>>> Sent: Tuesday, March 9, 2021 2:07 AM
> >>>>>> To: amd-gfx@lists.freedesktop.org
> >>>>>> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Li, Dennis
> >>>>>> <Dennis.Li@amd.com>
> >>>>>> Subject: [PATCH] drm/amdgpu: capture invalid hardware access v2
> >>>>>>
> >>>>>> From: Dennis Li <Dennis.Li@amd.com>
> >>>>>>
> >>>>>> When recovery thread has begun GPU reset, there should be not
> >>>>>> other threads to access hardware, otherwise system randomly hang.
> >>>>>>
> >>>>>> v2 (chk): rewritten from scratch, use trylock and lockdep instead of
> >>>>>>    hand wiring the logic.
> >>>>>>
> >>>>>> Signed-off-by: Dennis Li <Dennis.Li@amd.com>
> >>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
> >>>>>> ---
> >>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74
> >>>>>> +++++++++++++++++-----
> >>>>>>   1 file changed, 57 insertions(+), 17 deletions(-)
> >>>>>>
> >>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>>>> index e247c3a2ec08..c990af6a43ca 100644
> >>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>>>>> @@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct
> >>>>>> amdgpu_device *adev, loff_t pos,
> >>>>>>   /*
> >>>>>>    * register access helper functions.
> >>>>>>    */
> >>>>>> +
> >>>>>> +/* Check if hw access should be skipped because of hotplug or device
> >>>>>> +error */ static bool amdgpu_device_skip_hw_access(struct
> >>>>>> amdgpu_device
> >>>>>> +*adev) {
> >>>>>> +if (adev->in_pci_err_recovery)
> >>>>>> +return true;
> >>>>>> +
> >>>>>> +#ifdef CONFIG_LOCKDEP
> >>>>>> +/*
> >>>>>> + * This is a bit complicated to understand, so worth a comment.
> >>>>>> What we assert
> >>>>>> + * here is that the GPU reset is not running on another thread in
> >>>>>> parallel.
> >>>>>> + *
> >>>>>> + * For this we trylock the read side of the reset semaphore, if
> >>>>>> that succeeds
> >>>>>> + * we know that the reset is not running in paralell.
> >>>>>> + *
> >>>>>> + * If the trylock fails we assert that we are either already
> >>>>>> holding the read
> >>>>>> + * side of the lock or are the reset thread itself and hold the
> >>>>>> write side of
> >>>>>> + * the lock.
> >>>>>> + */
> >>>>>> +if (down_read_trylock(&adev->reset_sem))
> >>>>>> +up_read(&adev->reset_sem);
> >>>>>> +else
> >>>>>> +lockdep_assert_held(&adev->reset_sem);
> >>>>>> +#endif
> >>>>>> +
> >>>>>> +return false;
> >>>>>> +}
> >>>>>> +
> >>>>>>   /**
> >>>>>>    * amdgpu_device_rreg - read a memory mapped IO or indirect
> >>>>>> register
> >>>>>>    *
> >>>>>> @@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct
> >>>>>> amdgpu_device *adev,  {
> >>>>>>   uint32_t ret;
> >>>>>>
> >>>>>> -if (adev->in_pci_err_recovery)
> >>>>>> +if (amdgpu_device_skip_hw_access(adev))
> >>>>>>   return 0;
> >>>>>>
> >>>>>>   if ((reg * 4) < adev->rmmio_size) {
> >>>>>> @@ -377,7 +405,7 @@ uint32_t amdgpu_device_rreg(struct
> >>>>>> amdgpu_device *adev,
> >>>>>>    */
> >>>>>>   uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t
> >>>>>> offset)  {
> >>>>>> -if (adev->in_pci_err_recovery)
> >>>>>> +if (amdgpu_device_skip_hw_access(adev))
> >>>>>>   return 0;
> >>>>>>
> >>>>>>   if (offset < adev->rmmio_size)
> >>>>>> @@ -402,7 +430,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device
> >>>>>> *adev, uint32_t offset)
> >>>>>>    */
> >>>>>>   void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t
> >>>>>> offset, uint8_t value)  {
> >>>>>> -if (adev->in_pci_err_recovery)
> >>>>>> +if (amdgpu_device_skip_hw_access(adev))
> >>>>>>   return;
> >>>>>>
> >>>>>>   if (offset < adev->rmmio_size)
> >>>>>> @@ -425,7 +453,7 @@ void amdgpu_device_wreg(struct amdgpu_device
> >>>>>> *adev,
> >>>>>>   uint32_t reg, uint32_t v,
> >>>>>>   uint32_t acc_flags)
> >>>>>>   {
> >>>>>> -if (adev->in_pci_err_recovery)
> >>>>>> +if (amdgpu_device_skip_hw_access(adev))
> >>>>>>   return;
> >>>>>>
> >>>>>>   if ((reg * 4) < adev->rmmio_size) {
> >>>>>> @@ -452,7 +480,7 @@ void amdgpu_device_wreg(struct amdgpu_device
> >>>>>> *adev,  void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
> >>>>>>        uint32_t reg, uint32_t v)
> >>>>>>   {
> >>>>>> -if (adev->in_pci_err_recovery)
> >>>>>> +if (amdgpu_device_skip_hw_access(adev))
> >>>>>>   return;
> >>>>>>
> >>>>>>   if (amdgpu_sriov_fullaccess(adev) &&
> >>>>>> @@ -475,7 +503,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct
> >>>>>> amdgpu_device *adev,
> >>>>>>    */
> >>>>>>   u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg) {
> >>>>>> -if (adev->in_pci_err_recovery)
> >>>>>> +if (amdgpu_device_skip_hw_access(adev))
> >>>>>>   return 0;
> >>>>>>
> >>>>>>   if ((reg * 4) < adev->rio_mem_size)
> >>>>>> @@ -497,7 +525,7 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev,
> >>>>>> u32 reg)
> >>>>>>    */
> >>>>>>   void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)  {
> >>>>>> -if (adev->in_pci_err_recovery)
> >>>>>> +if (amdgpu_device_skip_hw_access(adev))
> >>>>>>   return;
> >>>>>>
> >>>>>>   if ((reg * 4) < adev->rio_mem_size)
> >>>>>> @@ -519,7 +547,7 @@ void amdgpu_io_wreg(struct amdgpu_device
> >>>>>> *adev, u32 reg, u32 v)
> >>>>>>    */
> >>>>>>   u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)  {
> >>>>>> -if (adev->in_pci_err_recovery)
> >>>>>> +if (amdgpu_device_skip_hw_access(adev))
> >>>>>>   return 0;
> >>>>>>
> >>>>>>   if (index < adev->doorbell.num_doorbells) { @@ -542,7 +570,7 @@
> >>>>>> u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
> >>>>>>    */
> >>>>>>   void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index,
> >>>>>> u32 v)  {
> >>>>>> -if (adev->in_pci_err_recovery)
> >>>>>> +if (amdgpu_device_skip_hw_access(adev))
> >>>>>>   return;
> >>>>>>
> >>>>>>   if (index < adev->doorbell.num_doorbells) { @@ -563,7 +591,7 @@
> >>>>>> void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index,
> >>>>>> u32 v)
> >>>>>>    */
> >>>>>>   u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)  {
> >>>>>> -if (adev->in_pci_err_recovery)
> >>>>>> +if (amdgpu_device_skip_hw_access(adev))
> >>>>>>   return 0;
> >>>>>>
> >>>>>>   if (index < adev->doorbell.num_doorbells) { @@ -586,7 +614,7 @@
> >>>>>> u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
> >>>>>>    */
> >>>>>>   void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32
> >>>>>> index, u64 v)  {
> >>>>>> -if (adev->in_pci_err_recovery)
> >>>>>> +if (amdgpu_device_skip_hw_access(adev))
> >>>>>>   return;
> >>>>>>
> >>>>>>   if (index < adev->doorbell.num_doorbells) { @@ -610,10 +638,13
> >>>>>> @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
> >>>>>>   u32 pcie_index, u32 pcie_data,
> >>>>>>   u32 reg_addr)
> >>>>>>   {
> >>>>>> -unsigned long flags;
> >>>>>> -u32 r;
> >>>>>>   void __iomem *pcie_index_offset;
> >>>>>>   void __iomem *pcie_data_offset;
> >>>>>> +unsigned long flags;
> >>>>>> +u32 r;
> >>>>>> +
> >>>>>> +if (amdgpu_device_skip_hw_access(adev))
> >>>>>> +return 0;
> >>>>>>
> >>>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
> >>>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index *
> >>>>>> 4; @@ -641,10 +672,13 @@ u64 amdgpu_device_indirect_rreg64(struct
> >>>>>> amdgpu_device *adev,
> >>>>>>     u32 pcie_index, u32 pcie_data,
> >>>>>>     u32 reg_addr)
> >>>>>>   {
> >>>>>> -unsigned long flags;
> >>>>>> -u64 r;
> >>>>>>   void __iomem *pcie_index_offset;
> >>>>>>   void __iomem *pcie_data_offset;
> >>>>>> +unsigned long flags;
> >>>>>> +u64 r;
> >>>>>> +
> >>>>>> +if (amdgpu_device_skip_hw_access(adev))
> >>>>>> +return 0;
> >>>>>>
> >>>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
> >>>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index *
> >>>>>> 4; @@ -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct
> >>>>>> amdgpu_device *adev,
> >>>>>>    u32 pcie_index, u32 pcie_data,
> >>>>>>    u32 reg_addr, u32 reg_data)
> >>>>>>   {
> >>>>>> -unsigned long flags;
> >>>>>>   void __iomem *pcie_index_offset;
> >>>>>>   void __iomem *pcie_data_offset;
> >>>>>> +unsigned long flags;
> >>>>>> +
> >>>>>> +if (amdgpu_device_skip_hw_access(adev))
> >>>>>> +return;
> >>>>>>
> >>>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
> >>>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index *
> >>>>>> 4; @@ -706,9 +743,12 @@ void amdgpu_device_indirect_wreg64(struct
> >>>>>> amdgpu_device *adev,
> >>>>>>      u32 pcie_index, u32 pcie_data,
> >>>>>>      u32 reg_addr, u64 reg_data)
> >>>>>>   {
> >>>>>> -unsigned long flags;
> >>>>>>   void __iomem *pcie_index_offset;
> >>>>>>   void __iomem *pcie_data_offset;
> >>>>>> +unsigned long flags;
> >>>>>> +
> >>>>>> +if (amdgpu_device_skip_hw_access(adev))
> >>>>>> +return;
> >>>>>>
> >>>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
> >>>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
> >>>>>> --
> >>>>>> 2.25.1
> >>>>>>
> >>>>>
> >>>
> >



-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH] drm/amdgpu: capture invalid hardware access v2
  2021-03-09 18:26         ` Andrey Grodzovsky
  2021-03-09 18:51           ` Christian König
@ 2021-03-10  2:59           ` Li, Dennis
  2021-03-10  5:42             ` Andrey Grodzovsky
  1 sibling, 1 reply; 16+ messages in thread
From: Li, Dennis @ 2021-03-10  2:59 UTC (permalink / raw)
  To: Grodzovsky, Andrey, Christian König, amd-gfx

[AMD Official Use Only - Internal Distribution Only]

Hi, Andrey,
>>> Is the problem here that HW is suspended while some other threads that rely on the read side lock still access HW ? Mostly what I am thinking about are IOCTls - we can't 'wait for them to complete' but they might be accessing HW when we start suspend.
In read side, when the reader held the read lock, it will also check whether adev->in_gpu_reset is 1, if so, it will release read clock and is waiting for recovery finish event.

Best Regards
Dennis Li

-----Original Message-----
From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
Sent: Wednesday, March 10, 2021 2:26 AM
To: Christian König <ckoenig.leichtzumerken@gmail.com>; Li, Dennis <Dennis.Li@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: capture invalid hardware access v2

On 2021-03-09 12:47 p.m., Christian König wrote:
> No it won't. Accessing the hardware without the lock is ok as long as
> the write side isn't taken.

Oh, forgot about the trylock part, sorry...

>
> But that approach is illegal anyway because we suspend the hardware
> without proper protection from concurrent access.

For my understanding and from looking again at his steps related to this

Step 0: atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) - [AG] protects from other TDR threads

Step 1: cancel all delay works, stop drm schedule, complete all unreceived fences and so on. Call amdgpu_device_pre_asic_reset... e.t.c
- [AG] this is the HW suspend part

Step 2: call down_write(&adev->reset_sem) to hold write lock, which will block recovery thread until other threads release read locks.

Is the problem here that HW is suspended while some other threads that rely on the read side lock still access HW ? Mostly what I am thinking about are IOCTls - we can't 'wait for them to complete' but they might be accessing HW when we start suspend.

Andrey


>
> Christian.
>
> Am 09.03.21 um 17:40 schrieb Andrey Grodzovsky:
>> Because he takes the write side lock post amdgpu_pre_asic_reset -
>> where HW suspend sequence happens (touching registers) - so i think
>> it will assert.
>>
>> Andrey
>>
>> On 2021-03-09 7:56 a.m., Christian König wrote:
>>> Hi Dennis,
>>>
>>> why do you think that this will always assert in reset thread?
>>>
>>> In the reset thread while we are holding the reset lock write side
>>> lockdep_assert_held() should be satisfied and not cause any splat in
>>> the system log.
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 09.03.21 um 03:03 schrieb Li, Dennis:
>>>> [AMD Official Use Only - Internal Distribution Only]
>>>>
>>>> Hi, Christian,
>>>>         amdgpu_device_skip_hw_access will always assert in reset
>>>> thread, which seems not a good idea.
>>>>
>>>> Best Regards
>>>> Dennis Li
>>>> -----Original Message-----
>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>> Sent: Tuesday, March 9, 2021 2:07 AM
>>>> To: amd-gfx@lists.freedesktop.org
>>>> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Li, Dennis
>>>> <Dennis.Li@amd.com>
>>>> Subject: [PATCH] drm/amdgpu: capture invalid hardware access v2
>>>>
>>>> From: Dennis Li <Dennis.Li@amd.com>
>>>>
>>>> When recovery thread has begun GPU reset, there should be not other
>>>> threads to access hardware, otherwise system randomly hang.
>>>>
>>>> v2 (chk): rewritten from scratch, use trylock and lockdep instead
>>>> of
>>>>    hand wiring the logic.
>>>>
>>>> Signed-off-by: Dennis Li <Dennis.Li@amd.com>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74
>>>> +++++++++++++++++-----
>>>>   1 file changed, 57 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index e247c3a2ec08..c990af6a43ca 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct
>>>> amdgpu_device *adev, loff_t pos,
>>>>   /*
>>>>    * register access helper functions.
>>>>    */
>>>> +
>>>> +/* Check if hw access should be skipped because of hotplug or
>>>> +device error */ static bool amdgpu_device_skip_hw_access(struct
>>>> +amdgpu_device
>>>> +*adev) {
>>>> +if (adev->in_pci_err_recovery)
>>>> +return true;
>>>> +
>>>> +#ifdef CONFIG_LOCKDEP
>>>> +/*
>>>> + * This is a bit complicated to understand, so worth a comment.
>>>> What we assert
>>>> + * here is that the GPU reset is not running on another thread in
>>>> parallel.
>>>> + *
>>>> + * For this we trylock the read side of the reset semaphore, if
>>>> that succeeds
>>>> + * we know that the reset is not running in paralell.
>>>> + *
>>>> + * If the trylock fails we assert that we are either already
>>>> holding the read
>>>> + * side of the lock or are the reset thread itself and hold the
>>>> write side of
>>>> + * the lock.
>>>> + */
>>>> +if (down_read_trylock(&adev->reset_sem))
>>>> +up_read(&adev->reset_sem);
>>>> +else
>>>> +lockdep_assert_held(&adev->reset_sem);
>>>> +#endif
>>>> +
>>>> +return false;
>>>> +}
>>>> +
>>>>   /**
>>>>    * amdgpu_device_rreg - read a memory mapped IO or indirect
>>>> register
>>>>    *
>>>> @@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct
>>>> amdgpu_device *adev,  {
>>>>   uint32_t ret;
>>>>
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return 0;
>>>>
>>>>   if ((reg * 4) < adev->rmmio_size) { @@ -377,7 +405,7 @@ uint32_t
>>>> amdgpu_device_rreg(struct amdgpu_device *adev,
>>>>    */
>>>>   uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t
>>>> offset)  {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return 0;
>>>>
>>>>   if (offset < adev->rmmio_size)
>>>> @@ -402,7 +430,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device
>>>> *adev, uint32_t offset)
>>>>    */
>>>>   void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset,
>>>> uint8_t value)  { -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return;
>>>>
>>>>   if (offset < adev->rmmio_size)
>>>> @@ -425,7 +453,7 @@ void amdgpu_device_wreg(struct amdgpu_device
>>>> *adev,
>>>>   uint32_t reg, uint32_t v,
>>>>   uint32_t acc_flags)
>>>>   {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return;
>>>>
>>>>   if ((reg * 4) < adev->rmmio_size) { @@ -452,7 +480,7 @@ void
>>>> amdgpu_device_wreg(struct amdgpu_device *adev,  void
>>>> amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>>        uint32_t reg, uint32_t v)
>>>>   {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return;
>>>>
>>>>   if (amdgpu_sriov_fullaccess(adev) && @@ -475,7 +503,7 @@ void
>>>> amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>>    */
>>>>   u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)  { -if
>>>> (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return 0;
>>>>
>>>>   if ((reg * 4) < adev->rio_mem_size) @@ -497,7 +525,7 @@ u32
>>>> amdgpu_io_rreg(struct amdgpu_device *adev,
>>>> u32 reg)
>>>>    */
>>>>   void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
>>>> { -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return;
>>>>
>>>>   if ((reg * 4) < adev->rio_mem_size) @@ -519,7 +547,7 @@ void
>>>> amdgpu_io_wreg(struct amdgpu_device *adev,
>>>> u32 reg, u32 v)
>>>>    */
>>>>   u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)  {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return 0;
>>>>
>>>>   if (index < adev->doorbell.num_doorbells) { @@ -542,7 +570,7 @@
>>>> u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
>>>>    */
>>>>   void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index,
>>>> u32 v)  {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return;
>>>>
>>>>   if (index < adev->doorbell.num_doorbells) { @@ -563,7 +591,7 @@
>>>> void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32
>>>> v)
>>>>    */
>>>>   u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
>>>> { -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return 0;
>>>>
>>>>   if (index < adev->doorbell.num_doorbells) { @@ -586,7 +614,7 @@
>>>> u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
>>>>    */
>>>>   void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index,
>>>> u64 v)  {
>>>> -if (adev->in_pci_err_recovery)
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>   return;
>>>>
>>>>   if (index < adev->doorbell.num_doorbells) { @@ -610,10 +638,13 @@
>>>> u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
>>>>   u32 pcie_index, u32 pcie_data,
>>>>   u32 reg_addr)
>>>>   {
>>>> -unsigned long flags;
>>>> -u32 r;
>>>>   void __iomem *pcie_index_offset;
>>>>   void __iomem *pcie_data_offset;
>>>> +unsigned long flags;
>>>> +u32 r;
>>>> +
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>> +return 0;
>>>>
>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
>>>> @@ -641,10 +672,13 @@ u64 amdgpu_device_indirect_rreg64(struct
>>>> amdgpu_device *adev,
>>>>     u32 pcie_index, u32 pcie_data,
>>>>     u32 reg_addr)
>>>>   {
>>>> -unsigned long flags;
>>>> -u64 r;
>>>>   void __iomem *pcie_index_offset;
>>>>   void __iomem *pcie_data_offset;
>>>> +unsigned long flags;
>>>> +u64 r;
>>>> +
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>> +return 0;
>>>>
>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
>>>> @@ -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct
>>>> amdgpu_device *adev,
>>>>    u32 pcie_index, u32 pcie_data,
>>>>    u32 reg_addr, u32 reg_data)
>>>>   {
>>>> -unsigned long flags;
>>>>   void __iomem *pcie_index_offset;
>>>>   void __iomem *pcie_data_offset;
>>>> +unsigned long flags;
>>>> +
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>> +return;
>>>>
>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
>>>> @@ -706,9 +743,12 @@ void amdgpu_device_indirect_wreg64(struct
>>>> amdgpu_device *adev,
>>>>      u32 pcie_index, u32 pcie_data,
>>>>      u32 reg_addr, u64 reg_data)
>>>>   {
>>>> -unsigned long flags;
>>>>   void __iomem *pcie_index_offset;
>>>>   void __iomem *pcie_data_offset;
>>>> +unsigned long flags;
>>>> +
>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>> +return;
>>>>
>>>>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
>>>> --
>>>> 2.25.1
>>>>
>>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH] drm/amdgpu: capture invalid hardware access v2
  2021-03-09 12:56   ` Christian König
  2021-03-09 16:40     ` Andrey Grodzovsky
@ 2021-03-10  3:12     ` Li, Dennis
  1 sibling, 0 replies; 16+ messages in thread
From: Li, Dennis @ 2021-03-10  3:12 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: Grodzovsky, Andrey

[AMD Official Use Only - Internal Distribution Only]

Hi, Christian,
      Because register r/w functions are also used in ISR, we need add in_irq() check. I updated this change in v3.

Best Regards
Dennis Li
-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com>
Sent: Tuesday, March 9, 2021 8:57 PM
To: Li, Dennis <Dennis.Li@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
Subject: Re: [PATCH] drm/amdgpu: capture invalid hardware access v2

Hi Dennis,

why do you think that this will always assert in reset thread?

In the reset thread while we are holding the reset lock write side
lockdep_assert_held() should be satisfied and not cause any splat in the system log.

Regards,
Christian.

Am 09.03.21 um 03:03 schrieb Li, Dennis:
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi, Christian,
>         amdgpu_device_skip_hw_access will always assert in reset thread, which seems not a good idea.
>
> Best Regards
> Dennis Li
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Tuesday, March 9, 2021 2:07 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Li, Dennis
> <Dennis.Li@amd.com>
> Subject: [PATCH] drm/amdgpu: capture invalid hardware access v2
>
> From: Dennis Li <Dennis.Li@amd.com>
>
> When recovery thread has begun GPU reset, there should be not other threads to access hardware, otherwise system randomly hang.
>
> v2 (chk): rewritten from scratch, use trylock and lockdep instead of
>    hand wiring the logic.
>
> Signed-off-by: Dennis Li <Dennis.Li@amd.com>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74 +++++++++++++++++-----
>   1 file changed, 57 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index e247c3a2ec08..c990af6a43ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
>   /*
>    * register access helper functions.
>    */
> +
> +/* Check if hw access should be skipped because of hotplug or device
> +error */ static bool amdgpu_device_skip_hw_access(struct
> +amdgpu_device
> +*adev) {
> +if (adev->in_pci_err_recovery)
> +return true;
> +
> +#ifdef CONFIG_LOCKDEP
> +/*
> + * This is a bit complicated to understand, so worth a comment. What
> +we assert
> + * here is that the GPU reset is not running on another thread in parallel.
> + *
> + * For this we trylock the read side of the reset semaphore, if that
> +succeeds
> + * we know that the reset is not running in paralell.
> + *
> + * If the trylock fails we assert that we are either already holding
> +the read
> + * side of the lock or are the reset thread itself and hold the write
> +side of
> + * the lock.
> + */
> +if (down_read_trylock(&adev->reset_sem))
> +up_read(&adev->reset_sem);
> +else
> +lockdep_assert_held(&adev->reset_sem);
> +#endif
> +
> +return false;
> +}
> +
>   /**
>    * amdgpu_device_rreg - read a memory mapped IO or indirect register
>    *
> @@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,  {
>   uint32_t ret;
>
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return 0;
>
>   if ((reg * 4) < adev->rmmio_size) {
> @@ -377,7 +405,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>    */
>   uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
> { -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return 0;
>
>   if (offset < adev->rmmio_size)
> @@ -402,7 +430,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
>    */
>   void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset,
> uint8_t value)  { -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return;
>
>   if (offset < adev->rmmio_size)
> @@ -425,7 +453,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,
>   uint32_t reg, uint32_t v,
>   uint32_t acc_flags)
>   {
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return;
>
>   if ((reg * 4) < adev->rmmio_size) {
> @@ -452,7 +480,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,  void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>        uint32_t reg, uint32_t v)
>   {
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return;
>
>   if (amdgpu_sriov_fullaccess(adev) && @@ -475,7 +503,7 @@ void
> amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>    */
>   u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)  { -if
> (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return 0;
>
>   if ((reg * 4) < adev->rio_mem_size)
> @@ -497,7 +525,7 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
>    */
>   void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)  {
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return;
>
>   if ((reg * 4) < adev->rio_mem_size)
> @@ -519,7 +547,7 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
>    */
>   u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)  {
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return 0;
>
>   if (index < adev->doorbell.num_doorbells) { @@ -542,7 +570,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
>    */
>   void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32
> v)  { -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return;
>
>   if (index < adev->doorbell.num_doorbells) { @@ -563,7 +591,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
>    */
>   u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)  {
> -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return 0;
>
>   if (index < adev->doorbell.num_doorbells) { @@ -586,7 +614,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
>    */
>   void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index,
> u64 v)  { -if (adev->in_pci_err_recovery)
> +if (amdgpu_device_skip_hw_access(adev))
>   return;
>
>   if (index < adev->doorbell.num_doorbells) { @@ -610,10 +638,13 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
>   u32 pcie_index, u32 pcie_data,
>   u32 reg_addr)
>   {
> -unsigned long flags;
> -u32 r;
>   void __iomem *pcie_index_offset;
>   void __iomem *pcie_data_offset;
> +unsigned long flags;
> +u32 r;
> +
> +if (amdgpu_device_skip_hw_access(adev))
> +return 0;
>
>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; @@ -641,10 +672,13 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
>     u32 pcie_index, u32 pcie_data,
>     u32 reg_addr)
>   {
> -unsigned long flags;
> -u64 r;
>   void __iomem *pcie_index_offset;
>   void __iomem *pcie_data_offset;
> +unsigned long flags;
> +u64 r;
> +
> +if (amdgpu_device_skip_hw_access(adev))
> +return 0;
>
>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; @@ -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
>    u32 pcie_index, u32 pcie_data,
>    u32 reg_addr, u32 reg_data)
>   {
> -unsigned long flags;
>   void __iomem *pcie_index_offset;
>   void __iomem *pcie_data_offset;
> +unsigned long flags;
> +
> +if (amdgpu_device_skip_hw_access(adev))
> +return;
>
>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; @@ -706,9 +743,12 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
>      u32 pcie_index, u32 pcie_data,
>      u32 reg_addr, u64 reg_data)
>   {
> -unsigned long flags;
>   void __iomem *pcie_index_offset;
>   void __iomem *pcie_data_offset;
> +unsigned long flags;
> +
> +if (amdgpu_device_skip_hw_access(adev))
> +return;
>
>   spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>   pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
> --
> 2.25.1
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
  2021-03-10  2:59           ` Li, Dennis
@ 2021-03-10  5:42             ` Andrey Grodzovsky
  2021-03-10  6:40               ` Li, Dennis
  0 siblings, 1 reply; 16+ messages in thread
From: Andrey Grodzovsky @ 2021-03-10  5:42 UTC (permalink / raw)
  To: Li, Dennis, Christian König, amd-gfx

But how this will help if TDR thread will start after you both took read
lock and checked that adev->in_gpu_reset is false ? Since TDR  now takes
write lock only after suspending HW and waiting for all fences there is
nothing that prevents both threads (e.g IOCTL and TDR) to access
registers concurrently.

Andrey

On 2021-03-09 9:59 p.m., Li, Dennis wrote:
> [AMD Official Use Only - Internal Distribution Only]
> 
> Hi, Andrey,
>>>> Is the problem here that HW is suspended while some other threads that rely on the read side lock still access HW ? Mostly what I am thinking about are IOCTls - we can't 'wait for them to complete' but they might be accessing HW when we start suspend.
> In read side, when the reader held the read lock, it will also check whether adev->in_gpu_reset is 1, if so, it will release read clock and is waiting for recovery finish event.


> 
> Best Regards
> Dennis Li
> 
> -----Original Message-----
> From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
> Sent: Wednesday, March 10, 2021 2:26 AM
> To: Christian König <ckoenig.leichtzumerken@gmail.com>; Li, Dennis <Dennis.Li@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
> 
> On 2021-03-09 12:47 p.m., Christian König wrote:
>> No it won't. Accessing the hardware without the lock is ok as long as
>> the write side isn't taken.
> 
> Oh, forgot about the trylock part, sorry...
> 
>>
>> But that approach is illegal anyway because we suspend the hardware
>> without proper protection from concurrent access.
> 
> For my understanding and from looking again at his steps related to this
> 
> Step 0: atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) - [AG] protects from other TDR threads
> 
> Step 1: cancel all delay works, stop drm schedule, complete all unreceived fences and so on. Call amdgpu_device_pre_asic_reset... e.t.c
> - [AG] this is the HW suspend part
> 
> Step 2: call down_write(&adev->reset_sem) to hold write lock, which will block recovery thread until other threads release read locks.
> 
> Is the problem here that HW is suspended while some other threads that rely on the read side lock still access HW ? Mostly what I am thinking about are IOCTls - we can't 'wait for them to complete' but they might be accessing HW when we start suspend.
> 
> Andrey
> 
> 
>>
>> Christian.
>>
>> Am 09.03.21 um 17:40 schrieb Andrey Grodzovsky:
>>> Because he takes the write side lock post amdgpu_pre_asic_reset -
>>> where HW suspend sequence happens (touching registers) - so i think
>>> it will assert.
>>>
>>> Andrey
>>>
>>> On 2021-03-09 7:56 a.m., Christian König wrote:
>>>> Hi Dennis,
>>>>
>>>> why do you think that this will always assert in reset thread?
>>>>
>>>> In the reset thread while we are holding the reset lock write side
>>>> lockdep_assert_held() should be satisfied and not cause any splat in
>>>> the system log.
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>> Am 09.03.21 um 03:03 schrieb Li, Dennis:
>>>>> [AMD Official Use Only - Internal Distribution Only]
>>>>>
>>>>> Hi, Christian,
>>>>>          amdgpu_device_skip_hw_access will always assert in reset
>>>>> thread, which seems not a good idea.
>>>>>
>>>>> Best Regards
>>>>> Dennis Li
>>>>> -----Original Message-----
>>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>>> Sent: Tuesday, March 9, 2021 2:07 AM
>>>>> To: amd-gfx@lists.freedesktop.org
>>>>> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Li, Dennis
>>>>> <Dennis.Li@amd.com>
>>>>> Subject: [PATCH] drm/amdgpu: capture invalid hardware access v2
>>>>>
>>>>> From: Dennis Li <Dennis.Li@amd.com>
>>>>>
>>>>> When recovery thread has begun GPU reset, there should be not other
>>>>> threads to access hardware, otherwise system randomly hang.
>>>>>
>>>>> v2 (chk): rewritten from scratch, use trylock and lockdep instead
>>>>> of
>>>>>     hand wiring the logic.
>>>>>
>>>>> Signed-off-by: Dennis Li <Dennis.Li@amd.com>
>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>> ---
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74
>>>>> +++++++++++++++++-----
>>>>>    1 file changed, 57 insertions(+), 17 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> index e247c3a2ec08..c990af6a43ca 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> @@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct
>>>>> amdgpu_device *adev, loff_t pos,
>>>>>    /*
>>>>>     * register access helper functions.
>>>>>     */
>>>>> +
>>>>> +/* Check if hw access should be skipped because of hotplug or
>>>>> +device error */ static bool amdgpu_device_skip_hw_access(struct
>>>>> +amdgpu_device
>>>>> +*adev) {
>>>>> +if (adev->in_pci_err_recovery)
>>>>> +return true;
>>>>> +
>>>>> +#ifdef CONFIG_LOCKDEP
>>>>> +/*
>>>>> + * This is a bit complicated to understand, so worth a comment.
>>>>> What we assert
>>>>> + * here is that the GPU reset is not running on another thread in
>>>>> parallel.
>>>>> + *
>>>>> + * For this we trylock the read side of the reset semaphore, if
>>>>> that succeeds
>>>>> + * we know that the reset is not running in paralell.
>>>>> + *
>>>>> + * If the trylock fails we assert that we are either already
>>>>> holding the read
>>>>> + * side of the lock or are the reset thread itself and hold the
>>>>> write side of
>>>>> + * the lock.
>>>>> + */
>>>>> +if (down_read_trylock(&adev->reset_sem))
>>>>> +up_read(&adev->reset_sem);
>>>>> +else
>>>>> +lockdep_assert_held(&adev->reset_sem);
>>>>> +#endif
>>>>> +
>>>>> +return false;
>>>>> +}
>>>>> +
>>>>>    /**
>>>>>     * amdgpu_device_rreg - read a memory mapped IO or indirect
>>>>> register
>>>>>     *
>>>>> @@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct
>>>>> amdgpu_device *adev,  {
>>>>>    uint32_t ret;
>>>>>
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return 0;
>>>>>
>>>>>    if ((reg * 4) < adev->rmmio_size) { @@ -377,7 +405,7 @@ uint32_t
>>>>> amdgpu_device_rreg(struct amdgpu_device *adev,
>>>>>     */
>>>>>    uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t
>>>>> offset)  {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return 0;
>>>>>
>>>>>    if (offset < adev->rmmio_size)
>>>>> @@ -402,7 +430,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device
>>>>> *adev, uint32_t offset)
>>>>>     */
>>>>>    void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset,
>>>>> uint8_t value)  { -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return;
>>>>>
>>>>>    if (offset < adev->rmmio_size)
>>>>> @@ -425,7 +453,7 @@ void amdgpu_device_wreg(struct amdgpu_device
>>>>> *adev,
>>>>>    uint32_t reg, uint32_t v,
>>>>>    uint32_t acc_flags)
>>>>>    {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return;
>>>>>
>>>>>    if ((reg * 4) < adev->rmmio_size) { @@ -452,7 +480,7 @@ void
>>>>> amdgpu_device_wreg(struct amdgpu_device *adev,  void
>>>>> amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>>>         uint32_t reg, uint32_t v)
>>>>>    {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return;
>>>>>
>>>>>    if (amdgpu_sriov_fullaccess(adev) && @@ -475,7 +503,7 @@ void
>>>>> amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>>>     */
>>>>>    u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)  { -if
>>>>> (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return 0;
>>>>>
>>>>>    if ((reg * 4) < adev->rio_mem_size) @@ -497,7 +525,7 @@ u32
>>>>> amdgpu_io_rreg(struct amdgpu_device *adev,
>>>>> u32 reg)
>>>>>     */
>>>>>    void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
>>>>> { -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return;
>>>>>
>>>>>    if ((reg * 4) < adev->rio_mem_size) @@ -519,7 +547,7 @@ void
>>>>> amdgpu_io_wreg(struct amdgpu_device *adev,
>>>>> u32 reg, u32 v)
>>>>>     */
>>>>>    u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)  {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return 0;
>>>>>
>>>>>    if (index < adev->doorbell.num_doorbells) { @@ -542,7 +570,7 @@
>>>>> u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
>>>>>     */
>>>>>    void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index,
>>>>> u32 v)  {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return;
>>>>>
>>>>>    if (index < adev->doorbell.num_doorbells) { @@ -563,7 +591,7 @@
>>>>> void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32
>>>>> v)
>>>>>     */
>>>>>    u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
>>>>> { -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return 0;
>>>>>
>>>>>    if (index < adev->doorbell.num_doorbells) { @@ -586,7 +614,7 @@
>>>>> u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
>>>>>     */
>>>>>    void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index,
>>>>> u64 v)  {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return;
>>>>>
>>>>>    if (index < adev->doorbell.num_doorbells) { @@ -610,10 +638,13 @@
>>>>> u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
>>>>>    u32 pcie_index, u32 pcie_data,
>>>>>    u32 reg_addr)
>>>>>    {
>>>>> -unsigned long flags;
>>>>> -u32 r;
>>>>>    void __iomem *pcie_index_offset;
>>>>>    void __iomem *pcie_data_offset;
>>>>> +unsigned long flags;
>>>>> +u32 r;
>>>>> +
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>> +return 0;
>>>>>
>>>>>    spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>    pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
>>>>> @@ -641,10 +672,13 @@ u64 amdgpu_device_indirect_rreg64(struct
>>>>> amdgpu_device *adev,
>>>>>      u32 pcie_index, u32 pcie_data,
>>>>>      u32 reg_addr)
>>>>>    {
>>>>> -unsigned long flags;
>>>>> -u64 r;
>>>>>    void __iomem *pcie_index_offset;
>>>>>    void __iomem *pcie_data_offset;
>>>>> +unsigned long flags;
>>>>> +u64 r;
>>>>> +
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>> +return 0;
>>>>>
>>>>>    spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>    pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
>>>>> @@ -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct
>>>>> amdgpu_device *adev,
>>>>>     u32 pcie_index, u32 pcie_data,
>>>>>     u32 reg_addr, u32 reg_data)
>>>>>    {
>>>>> -unsigned long flags;
>>>>>    void __iomem *pcie_index_offset;
>>>>>    void __iomem *pcie_data_offset;
>>>>> +unsigned long flags;
>>>>> +
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>> +return;
>>>>>
>>>>>    spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>    pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
>>>>> @@ -706,9 +743,12 @@ void amdgpu_device_indirect_wreg64(struct
>>>>> amdgpu_device *adev,
>>>>>       u32 pcie_index, u32 pcie_data,
>>>>>       u32 reg_addr, u64 reg_data)
>>>>>    {
>>>>> -unsigned long flags;
>>>>>    void __iomem *pcie_index_offset;
>>>>>    void __iomem *pcie_data_offset;
>>>>> +unsigned long flags;
>>>>> +
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>> +return;
>>>>>
>>>>>    spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>    pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
>>>>> --
>>>>> 2.25.1
>>>>>
>>>>
>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH] drm/amdgpu: capture invalid hardware access v2
  2021-03-10  5:42             ` Andrey Grodzovsky
@ 2021-03-10  6:40               ` Li, Dennis
  2021-03-10 13:03                 ` Christian König
  0 siblings, 1 reply; 16+ messages in thread
From: Li, Dennis @ 2021-03-10  6:40 UTC (permalink / raw)
  To: Grodzovsky, Andrey, Christian König, amd-gfx

[AMD Official Use Only - Internal Distribution Only]

>>> But how this will help if TDR thread will start after you both took read lock and checked that adev->in_gpu_reset is false ? Since TDR  now takes write lock only after suspending HW and waiting for all fences there is nothing that prevents both threads (e.g IOCTL and TDR) to access registers concurrently.
When read thread both took read lock and checked that adev->in_gpu_reset is false, it means that recovery thread still not start, and read thread will continue, it will exit normally, or exit because of fence completed by pre-reset functions. I assumed that pre-reset functions could complete all fences, which could make related threads exit and release read locks. About this, Christian has different thinking, I am trying to understand his concern.  TDR will be blocked until all read locks are released.

Best Regards
Dennis Li
-----Original Message-----
From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
Sent: Wednesday, March 10, 2021 1:42 PM
To: Li, Dennis <Dennis.Li@amd.com>; Christian König <ckoenig.leichtzumerken@gmail.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: capture invalid hardware access v2

But how this will help if TDR thread will start after you both took read lock and checked that adev->in_gpu_reset is false ? Since TDR  now takes write lock only after suspending HW and waiting for all fences there is nothing that prevents both threads (e.g IOCTL and TDR) to access registers concurrently.

Andrey

On 2021-03-09 9:59 p.m., Li, Dennis wrote:
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi, Andrey,
>>>> Is the problem here that HW is suspended while some other threads that rely on the read side lock still access HW ? Mostly what I am thinking about are IOCTls - we can't 'wait for them to complete' but they might be accessing HW when we start suspend.
> In read side, when the reader held the read lock, it will also check whether adev->in_gpu_reset is 1, if so, it will release read clock and is waiting for recovery finish event.


>
> Best Regards
> Dennis Li
>
> -----Original Message-----
> From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
> Sent: Wednesday, March 10, 2021 2:26 AM
> To: Christian König <ckoenig.leichtzumerken@gmail.com>; Li, Dennis
> <Dennis.Li@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
>
> On 2021-03-09 12:47 p.m., Christian König wrote:
>> No it won't. Accessing the hardware without the lock is ok as long as
>> the write side isn't taken.
>
> Oh, forgot about the trylock part, sorry...
>
>>
>> But that approach is illegal anyway because we suspend the hardware
>> without proper protection from concurrent access.
>
> For my understanding and from looking again at his steps related to
> this
>
> Step 0: atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) - [AG] protects from
> other TDR threads
>
> Step 1: cancel all delay works, stop drm schedule, complete all
> unreceived fences and so on. Call amdgpu_device_pre_asic_reset...
> e.t.c
> - [AG] this is the HW suspend part
>
> Step 2: call down_write(&adev->reset_sem) to hold write lock, which will block recovery thread until other threads release read locks.
>
> Is the problem here that HW is suspended while some other threads that rely on the read side lock still access HW ? Mostly what I am thinking about are IOCTls - we can't 'wait for them to complete' but they might be accessing HW when we start suspend.
>
> Andrey
>
>
>>
>> Christian.
>>
>> Am 09.03.21 um 17:40 schrieb Andrey Grodzovsky:
>>> Because he takes the write side lock post amdgpu_pre_asic_reset -
>>> where HW suspend sequence happens (touching registers) - so i think
>>> it will assert.
>>>
>>> Andrey
>>>
>>> On 2021-03-09 7:56 a.m., Christian König wrote:
>>>> Hi Dennis,
>>>>
>>>> why do you think that this will always assert in reset thread?
>>>>
>>>> In the reset thread while we are holding the reset lock write side
>>>> lockdep_assert_held() should be satisfied and not cause any splat
>>>> in the system log.
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>> Am 09.03.21 um 03:03 schrieb Li, Dennis:
>>>>> [AMD Official Use Only - Internal Distribution Only]
>>>>>
>>>>> Hi, Christian,
>>>>>          amdgpu_device_skip_hw_access will always assert in reset
>>>>> thread, which seems not a good idea.
>>>>>
>>>>> Best Regards
>>>>> Dennis Li
>>>>> -----Original Message-----
>>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>>> Sent: Tuesday, March 9, 2021 2:07 AM
>>>>> To: amd-gfx@lists.freedesktop.org
>>>>> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Li, Dennis
>>>>> <Dennis.Li@amd.com>
>>>>> Subject: [PATCH] drm/amdgpu: capture invalid hardware access v2
>>>>>
>>>>> From: Dennis Li <Dennis.Li@amd.com>
>>>>>
>>>>> When recovery thread has begun GPU reset, there should be not
>>>>> other threads to access hardware, otherwise system randomly hang.
>>>>>
>>>>> v2 (chk): rewritten from scratch, use trylock and lockdep instead
>>>>> of
>>>>>     hand wiring the logic.
>>>>>
>>>>> Signed-off-by: Dennis Li <Dennis.Li@amd.com>
>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>> ---
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74
>>>>> +++++++++++++++++-----
>>>>>    1 file changed, 57 insertions(+), 17 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> index e247c3a2ec08..c990af6a43ca 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> @@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct
>>>>> amdgpu_device *adev, loff_t pos,
>>>>>    /*
>>>>>     * register access helper functions.
>>>>>     */
>>>>> +
>>>>> +/* Check if hw access should be skipped because of hotplug or
>>>>> +device error */ static bool amdgpu_device_skip_hw_access(struct
>>>>> +amdgpu_device
>>>>> +*adev) {
>>>>> +if (adev->in_pci_err_recovery)
>>>>> +return true;
>>>>> +
>>>>> +#ifdef CONFIG_LOCKDEP
>>>>> +/*
>>>>> + * This is a bit complicated to understand, so worth a comment.
>>>>> What we assert
>>>>> + * here is that the GPU reset is not running on another thread in
>>>>> parallel.
>>>>> + *
>>>>> + * For this we trylock the read side of the reset semaphore, if
>>>>> that succeeds
>>>>> + * we know that the reset is not running in paralell.
>>>>> + *
>>>>> + * If the trylock fails we assert that we are either already
>>>>> holding the read
>>>>> + * side of the lock or are the reset thread itself and hold the
>>>>> write side of
>>>>> + * the lock.
>>>>> + */
>>>>> +if (down_read_trylock(&adev->reset_sem))
>>>>> +up_read(&adev->reset_sem);
>>>>> +else
>>>>> +lockdep_assert_held(&adev->reset_sem);
>>>>> +#endif
>>>>> +
>>>>> +return false;
>>>>> +}
>>>>> +
>>>>>    /**
>>>>>     * amdgpu_device_rreg - read a memory mapped IO or indirect
>>>>> register
>>>>>     *
>>>>> @@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct
>>>>> amdgpu_device *adev,  {
>>>>>    uint32_t ret;
>>>>>
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return 0;
>>>>>
>>>>>    if ((reg * 4) < adev->rmmio_size) { @@ -377,7 +405,7 @@
>>>>> uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>>>>>     */
>>>>>    uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t
>>>>> offset)  {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return 0;
>>>>>
>>>>>    if (offset < adev->rmmio_size)
>>>>> @@ -402,7 +430,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device
>>>>> *adev, uint32_t offset)
>>>>>     */
>>>>>    void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t
>>>>> offset, uint8_t value)  { -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return;
>>>>>
>>>>>    if (offset < adev->rmmio_size)
>>>>> @@ -425,7 +453,7 @@ void amdgpu_device_wreg(struct amdgpu_device
>>>>> *adev,
>>>>>    uint32_t reg, uint32_t v,
>>>>>    uint32_t acc_flags)
>>>>>    {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return;
>>>>>
>>>>>    if ((reg * 4) < adev->rmmio_size) { @@ -452,7 +480,7 @@ void
>>>>> amdgpu_device_wreg(struct amdgpu_device *adev,  void
>>>>> amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>>>         uint32_t reg, uint32_t v)
>>>>>    {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return;
>>>>>
>>>>>    if (amdgpu_sriov_fullaccess(adev) && @@ -475,7 +503,7 @@ void
>>>>> amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>>>     */
>>>>>    u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)  { -if
>>>>> (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return 0;
>>>>>
>>>>>    if ((reg * 4) < adev->rio_mem_size) @@ -497,7 +525,7 @@ u32
>>>>> amdgpu_io_rreg(struct amdgpu_device *adev,
>>>>> u32 reg)
>>>>>     */
>>>>>    void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
>>>>> { -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return;
>>>>>
>>>>>    if ((reg * 4) < adev->rio_mem_size) @@ -519,7 +547,7 @@ void
>>>>> amdgpu_io_wreg(struct amdgpu_device *adev,
>>>>> u32 reg, u32 v)
>>>>>     */
>>>>>    u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
>>>>> { -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return 0;
>>>>>
>>>>>    if (index < adev->doorbell.num_doorbells) { @@ -542,7 +570,7 @@
>>>>> u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
>>>>>     */
>>>>>    void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index,
>>>>> u32 v)  {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return;
>>>>>
>>>>>    if (index < adev->doorbell.num_doorbells) { @@ -563,7 +591,7 @@
>>>>> void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index,
>>>>> u32
>>>>> v)
>>>>>     */
>>>>>    u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32
>>>>> index) { -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return 0;
>>>>>
>>>>>    if (index < adev->doorbell.num_doorbells) { @@ -586,7 +614,7 @@
>>>>> u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
>>>>>     */
>>>>>    void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32
>>>>> index,
>>>>> u64 v)  {
>>>>> -if (adev->in_pci_err_recovery)
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>    return;
>>>>>
>>>>>    if (index < adev->doorbell.num_doorbells) { @@ -610,10 +638,13
>>>>> @@
>>>>> u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
>>>>>    u32 pcie_index, u32 pcie_data,
>>>>>    u32 reg_addr)
>>>>>    {
>>>>> -unsigned long flags;
>>>>> -u32 r;
>>>>>    void __iomem *pcie_index_offset;
>>>>>    void __iomem *pcie_data_offset;
>>>>> +unsigned long flags;
>>>>> +u32 r;
>>>>> +
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>> +return 0;
>>>>>
>>>>>    spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>    pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index *
>>>>> 4; @@ -641,10 +672,13 @@ u64 amdgpu_device_indirect_rreg64(struct
>>>>> amdgpu_device *adev,
>>>>>      u32 pcie_index, u32 pcie_data,
>>>>>      u32 reg_addr)
>>>>>    {
>>>>> -unsigned long flags;
>>>>> -u64 r;
>>>>>    void __iomem *pcie_index_offset;
>>>>>    void __iomem *pcie_data_offset;
>>>>> +unsigned long flags;
>>>>> +u64 r;
>>>>> +
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>> +return 0;
>>>>>
>>>>>    spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>    pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index *
>>>>> 4; @@ -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct
>>>>> amdgpu_device *adev,
>>>>>     u32 pcie_index, u32 pcie_data,
>>>>>     u32 reg_addr, u32 reg_data)
>>>>>    {
>>>>> -unsigned long flags;
>>>>>    void __iomem *pcie_index_offset;
>>>>>    void __iomem *pcie_data_offset;
>>>>> +unsigned long flags;
>>>>> +
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>> +return;
>>>>>
>>>>>    spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>    pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index *
>>>>> 4; @@ -706,9 +743,12 @@ void amdgpu_device_indirect_wreg64(struct
>>>>> amdgpu_device *adev,
>>>>>       u32 pcie_index, u32 pcie_data,
>>>>>       u32 reg_addr, u64 reg_data)
>>>>>    {
>>>>> -unsigned long flags;
>>>>>    void __iomem *pcie_index_offset;
>>>>>    void __iomem *pcie_data_offset;
>>>>> +unsigned long flags;
>>>>> +
>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>> +return;
>>>>>
>>>>>    spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>    pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index *
>>>>> 4;
>>>>> --
>>>>> 2.25.1
>>>>>
>>>>
>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
  2021-03-10  6:40               ` Li, Dennis
@ 2021-03-10 13:03                 ` Christian König
  2021-03-10 13:19                   ` Li, Dennis
  0 siblings, 1 reply; 16+ messages in thread
From: Christian König @ 2021-03-10 13:03 UTC (permalink / raw)
  To: Li, Dennis, Grodzovsky, Andrey, amd-gfx

> I assumed that pre-reset functions could complete all fences

And exactly that's incorrect.

We can only complete the low level hardware fences, but not the 
scheduler fences since those need to wait for the resubmission of the 
jobs and with it finishing the GPU reset.

Regards,
Christian.

Am 10.03.21 um 07:40 schrieb Li, Dennis:
> [AMD Official Use Only - Internal Distribution Only]
>
>>>> But how this will help if TDR thread will start after you both took read lock and checked that adev->in_gpu_reset is false ? Since TDR  now takes write lock only after suspending HW and waiting for all fences there is nothing that prevents both threads (e.g IOCTL and TDR) to access registers concurrently.
> When read thread both took read lock and checked that adev->in_gpu_reset is false, it means that recovery thread still not start, and read thread will continue, it will exit normally, or exit because of fence completed by pre-reset functions. I assumed that pre-reset functions could complete all fences, which could make related threads exit and release read locks. About this, Christian has different thinking, I am trying to understand his concern.  TDR will be blocked until all read locks are released.
>
> Best Regards
> Dennis Li
> -----Original Message-----
> From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
> Sent: Wednesday, March 10, 2021 1:42 PM
> To: Li, Dennis <Dennis.Li@amd.com>; Christian König <ckoenig.leichtzumerken@gmail.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
>
> But how this will help if TDR thread will start after you both took read lock and checked that adev->in_gpu_reset is false ? Since TDR  now takes write lock only after suspending HW and waiting for all fences there is nothing that prevents both threads (e.g IOCTL and TDR) to access registers concurrently.
>
> Andrey
>
> On 2021-03-09 9:59 p.m., Li, Dennis wrote:
>> [AMD Official Use Only - Internal Distribution Only]
>>
>> Hi, Andrey,
>>>>> Is the problem here that HW is suspended while some other threads that rely on the read side lock still access HW ? Mostly what I am thinking about are IOCTls - we can't 'wait for them to complete' but they might be accessing HW when we start suspend.
>> In read side, when the reader held the read lock, it will also check whether adev->in_gpu_reset is 1, if so, it will release read clock and is waiting for recovery finish event.
>
>> Best Regards
>> Dennis Li
>>
>> -----Original Message-----
>> From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
>> Sent: Wednesday, March 10, 2021 2:26 AM
>> To: Christian König <ckoenig.leichtzumerken@gmail.com>; Li, Dennis
>> <Dennis.Li@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
>>
>> On 2021-03-09 12:47 p.m., Christian König wrote:
>>> No it won't. Accessing the hardware without the lock is ok as long as
>>> the write side isn't taken.
>> Oh, forgot about the trylock part, sorry...
>>
>>> But that approach is illegal anyway because we suspend the hardware
>>> without proper protection from concurrent access.
>> For my understanding and from looking again at his steps related to
>> this
>>
>> Step 0: atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) - [AG] protects from
>> other TDR threads
>>
>> Step 1: cancel all delay works, stop drm schedule, complete all
>> unreceived fences and so on. Call amdgpu_device_pre_asic_reset...
>> e.t.c
>> - [AG] this is the HW suspend part
>>
>> Step 2: call down_write(&adev->reset_sem) to hold write lock, which will block recovery thread until other threads release read locks.
>>
>> Is the problem here that HW is suspended while some other threads that rely on the read side lock still access HW ? Mostly what I am thinking about are IOCTls - we can't 'wait for them to complete' but they might be accessing HW when we start suspend.
>>
>> Andrey
>>
>>
>>> Christian.
>>>
>>> Am 09.03.21 um 17:40 schrieb Andrey Grodzovsky:
>>>> Because he takes the write side lock post amdgpu_pre_asic_reset -
>>>> where HW suspend sequence happens (touching registers) - so i think
>>>> it will assert.
>>>>
>>>> Andrey
>>>>
>>>> On 2021-03-09 7:56 a.m., Christian König wrote:
>>>>> Hi Dennis,
>>>>>
>>>>> why do you think that this will always assert in reset thread?
>>>>>
>>>>> In the reset thread while we are holding the reset lock write side
>>>>> lockdep_assert_held() should be satisfied and not cause any splat
>>>>> in the system log.
>>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>> Am 09.03.21 um 03:03 schrieb Li, Dennis:
>>>>>> [AMD Official Use Only - Internal Distribution Only]
>>>>>>
>>>>>> Hi, Christian,
>>>>>>           amdgpu_device_skip_hw_access will always assert in reset
>>>>>> thread, which seems not a good idea.
>>>>>>
>>>>>> Best Regards
>>>>>> Dennis Li
>>>>>> -----Original Message-----
>>>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>>>> Sent: Tuesday, March 9, 2021 2:07 AM
>>>>>> To: amd-gfx@lists.freedesktop.org
>>>>>> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Li, Dennis
>>>>>> <Dennis.Li@amd.com>
>>>>>> Subject: [PATCH] drm/amdgpu: capture invalid hardware access v2
>>>>>>
>>>>>> From: Dennis Li <Dennis.Li@amd.com>
>>>>>>
>>>>>> When recovery thread has begun GPU reset, there should be not
>>>>>> other threads to access hardware, otherwise system randomly hang.
>>>>>>
>>>>>> v2 (chk): rewritten from scratch, use trylock and lockdep instead
>>>>>> of
>>>>>>      hand wiring the logic.
>>>>>>
>>>>>> Signed-off-by: Dennis Li <Dennis.Li@amd.com>
>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74
>>>>>> +++++++++++++++++-----
>>>>>>     1 file changed, 57 insertions(+), 17 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> index e247c3a2ec08..c990af6a43ca 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> @@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct
>>>>>> amdgpu_device *adev, loff_t pos,
>>>>>>     /*
>>>>>>      * register access helper functions.
>>>>>>      */
>>>>>> +
>>>>>> +/* Check if hw access should be skipped because of hotplug or
>>>>>> +device error */ static bool amdgpu_device_skip_hw_access(struct
>>>>>> +amdgpu_device
>>>>>> +*adev) {
>>>>>> +if (adev->in_pci_err_recovery)
>>>>>> +return true;
>>>>>> +
>>>>>> +#ifdef CONFIG_LOCKDEP
>>>>>> +/*
>>>>>> + * This is a bit complicated to understand, so worth a comment.
>>>>>> What we assert
>>>>>> + * here is that the GPU reset is not running on another thread in
>>>>>> parallel.
>>>>>> + *
>>>>>> + * For this we trylock the read side of the reset semaphore, if
>>>>>> that succeeds
>>>>>> + * we know that the reset is not running in paralell.
>>>>>> + *
>>>>>> + * If the trylock fails we assert that we are either already
>>>>>> holding the read
>>>>>> + * side of the lock or are the reset thread itself and hold the
>>>>>> write side of
>>>>>> + * the lock.
>>>>>> + */
>>>>>> +if (down_read_trylock(&adev->reset_sem))
>>>>>> +up_read(&adev->reset_sem);
>>>>>> +else
>>>>>> +lockdep_assert_held(&adev->reset_sem);
>>>>>> +#endif
>>>>>> +
>>>>>> +return false;
>>>>>> +}
>>>>>> +
>>>>>>     /**
>>>>>>      * amdgpu_device_rreg - read a memory mapped IO or indirect
>>>>>> register
>>>>>>      *
>>>>>> @@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct
>>>>>> amdgpu_device *adev,  {
>>>>>>     uint32_t ret;
>>>>>>
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return 0;
>>>>>>
>>>>>>     if ((reg * 4) < adev->rmmio_size) { @@ -377,7 +405,7 @@
>>>>>> uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>>>>>>      */
>>>>>>     uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t
>>>>>> offset)  {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return 0;
>>>>>>
>>>>>>     if (offset < adev->rmmio_size)
>>>>>> @@ -402,7 +430,7 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device
>>>>>> *adev, uint32_t offset)
>>>>>>      */
>>>>>>     void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t
>>>>>> offset, uint8_t value)  { -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return;
>>>>>>
>>>>>>     if (offset < adev->rmmio_size)
>>>>>> @@ -425,7 +453,7 @@ void amdgpu_device_wreg(struct amdgpu_device
>>>>>> *adev,
>>>>>>     uint32_t reg, uint32_t v,
>>>>>>     uint32_t acc_flags)
>>>>>>     {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return;
>>>>>>
>>>>>>     if ((reg * 4) < adev->rmmio_size) { @@ -452,7 +480,7 @@ void
>>>>>> amdgpu_device_wreg(struct amdgpu_device *adev,  void
>>>>>> amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>>>>          uint32_t reg, uint32_t v)
>>>>>>     {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return;
>>>>>>
>>>>>>     if (amdgpu_sriov_fullaccess(adev) && @@ -475,7 +503,7 @@ void
>>>>>> amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>>>>      */
>>>>>>     u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)  { -if
>>>>>> (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return 0;
>>>>>>
>>>>>>     if ((reg * 4) < adev->rio_mem_size) @@ -497,7 +525,7 @@ u32
>>>>>> amdgpu_io_rreg(struct amdgpu_device *adev,
>>>>>> u32 reg)
>>>>>>      */
>>>>>>     void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
>>>>>> { -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return;
>>>>>>
>>>>>>     if ((reg * 4) < adev->rio_mem_size) @@ -519,7 +547,7 @@ void
>>>>>> amdgpu_io_wreg(struct amdgpu_device *adev,
>>>>>> u32 reg, u32 v)
>>>>>>      */
>>>>>>     u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
>>>>>> { -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return 0;
>>>>>>
>>>>>>     if (index < adev->doorbell.num_doorbells) { @@ -542,7 +570,7 @@
>>>>>> u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
>>>>>>      */
>>>>>>     void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index,
>>>>>> u32 v)  {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return;
>>>>>>
>>>>>>     if (index < adev->doorbell.num_doorbells) { @@ -563,7 +591,7 @@
>>>>>> void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index,
>>>>>> u32
>>>>>> v)
>>>>>>      */
>>>>>>     u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32
>>>>>> index) { -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return 0;
>>>>>>
>>>>>>     if (index < adev->doorbell.num_doorbells) { @@ -586,7 +614,7 @@
>>>>>> u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
>>>>>>      */
>>>>>>     void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32
>>>>>> index,
>>>>>> u64 v)  {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return;
>>>>>>
>>>>>>     if (index < adev->doorbell.num_doorbells) { @@ -610,10 +638,13
>>>>>> @@
>>>>>> u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
>>>>>>     u32 pcie_index, u32 pcie_data,
>>>>>>     u32 reg_addr)
>>>>>>     {
>>>>>> -unsigned long flags;
>>>>>> -u32 r;
>>>>>>     void __iomem *pcie_index_offset;
>>>>>>     void __iomem *pcie_data_offset;
>>>>>> +unsigned long flags;
>>>>>> +u32 r;
>>>>>> +
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>> +return 0;
>>>>>>
>>>>>>     spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>     pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index *
>>>>>> 4; @@ -641,10 +672,13 @@ u64 amdgpu_device_indirect_rreg64(struct
>>>>>> amdgpu_device *adev,
>>>>>>       u32 pcie_index, u32 pcie_data,
>>>>>>       u32 reg_addr)
>>>>>>     {
>>>>>> -unsigned long flags;
>>>>>> -u64 r;
>>>>>>     void __iomem *pcie_index_offset;
>>>>>>     void __iomem *pcie_data_offset;
>>>>>> +unsigned long flags;
>>>>>> +u64 r;
>>>>>> +
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>> +return 0;
>>>>>>
>>>>>>     spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>     pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index *
>>>>>> 4; @@ -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct
>>>>>> amdgpu_device *adev,
>>>>>>      u32 pcie_index, u32 pcie_data,
>>>>>>      u32 reg_addr, u32 reg_data)
>>>>>>     {
>>>>>> -unsigned long flags;
>>>>>>     void __iomem *pcie_index_offset;
>>>>>>     void __iomem *pcie_data_offset;
>>>>>> +unsigned long flags;
>>>>>> +
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>> +return;
>>>>>>
>>>>>>     spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>     pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index *
>>>>>> 4; @@ -706,9 +743,12 @@ void amdgpu_device_indirect_wreg64(struct
>>>>>> amdgpu_device *adev,
>>>>>>        u32 pcie_index, u32 pcie_data,
>>>>>>        u32 reg_addr, u64 reg_data)
>>>>>>     {
>>>>>> -unsigned long flags;
>>>>>>     void __iomem *pcie_index_offset;
>>>>>>     void __iomem *pcie_data_offset;
>>>>>> +unsigned long flags;
>>>>>> +
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>> +return;
>>>>>>
>>>>>>     spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>     pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index *
>>>>>> 4;
>>>>>> --
>>>>>> 2.25.1
>>>>>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [PATCH] drm/amdgpu: capture invalid hardware access v2
  2021-03-10 13:03                 ` Christian König
@ 2021-03-10 13:19                   ` Li, Dennis
  2021-03-10 13:41                     ` Christian König
  0 siblings, 1 reply; 16+ messages in thread
From: Li, Dennis @ 2021-03-10 13:19 UTC (permalink / raw)
  To: Christian König, Grodzovsky, Andrey, amd-gfx

[AMD Official Use Only - Internal Distribution Only]

Hi, Christian,

>> We can only complete the low level hardware fences, but not the scheduler fences since those need to wait for the resubmission of the jobs and with it finishing the GPU reset.
Correct. I run a deadlock test and captured this case today.  Are there chances to let driver complete scheduler fences? If decide to do GPU reset, the pending rendering job in fact should be recreated because of vram lost. Currently, user application will recreate context when if driver report VRAM lost.

-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com>
Sent: Wednesday, March 10, 2021 9:03 PM
To: Li, Dennis <Dennis.Li@amd.com>; Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: capture invalid hardware access v2

> I assumed that pre-reset functions could complete all fences

And exactly that's incorrect.

We can only complete the low level hardware fences, but not the scheduler fences since those need to wait for the resubmission of the jobs and with it finishing the GPU reset.

Regards,
Christian.

Am 10.03.21 um 07:40 schrieb Li, Dennis:
> [AMD Official Use Only - Internal Distribution Only]
>
>>>> But how this will help if TDR thread will start after you both took read lock and checked that adev->in_gpu_reset is false ? Since TDR  now takes write lock only after suspending HW and waiting for all fences there is nothing that prevents both threads (e.g IOCTL and TDR) to access registers concurrently.
> When read thread both took read lock and checked that adev->in_gpu_reset is false, it means that recovery thread still not start, and read thread will continue, it will exit normally, or exit because of fence completed by pre-reset functions. I assumed that pre-reset functions could complete all fences, which could make related threads exit and release read locks. About this, Christian has different thinking, I am trying to understand his concern.  TDR will be blocked until all read locks are released.
>
> Best Regards
> Dennis Li
> -----Original Message-----
> From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
> Sent: Wednesday, March 10, 2021 1:42 PM
> To: Li, Dennis <Dennis.Li@amd.com>; Christian König
> <ckoenig.leichtzumerken@gmail.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
>
> But how this will help if TDR thread will start after you both took read lock and checked that adev->in_gpu_reset is false ? Since TDR  now takes write lock only after suspending HW and waiting for all fences there is nothing that prevents both threads (e.g IOCTL and TDR) to access registers concurrently.
>
> Andrey
>
> On 2021-03-09 9:59 p.m., Li, Dennis wrote:
>> [AMD Official Use Only - Internal Distribution Only]
>>
>> Hi, Andrey,
>>>>> Is the problem here that HW is suspended while some other threads that rely on the read side lock still access HW ? Mostly what I am thinking about are IOCTls - we can't 'wait for them to complete' but they might be accessing HW when we start suspend.
>> In read side, when the reader held the read lock, it will also check whether adev->in_gpu_reset is 1, if so, it will release read clock and is waiting for recovery finish event.
>
>> Best Regards
>> Dennis Li
>>
>> -----Original Message-----
>> From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
>> Sent: Wednesday, March 10, 2021 2:26 AM
>> To: Christian König <ckoenig.leichtzumerken@gmail.com>; Li, Dennis
>> <Dennis.Li@amd.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
>>
>> On 2021-03-09 12:47 p.m., Christian König wrote:
>>> No it won't. Accessing the hardware without the lock is ok as long
>>> as the write side isn't taken.
>> Oh, forgot about the trylock part, sorry...
>>
>>> But that approach is illegal anyway because we suspend the hardware
>>> without proper protection from concurrent access.
>> For my understanding and from looking again at his steps related to
>> this
>>
>> Step 0: atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) - [AG] protects
>> from other TDR threads
>>
>> Step 1: cancel all delay works, stop drm schedule, complete all
>> unreceived fences and so on. Call amdgpu_device_pre_asic_reset...
>> e.t.c
>> - [AG] this is the HW suspend part
>>
>> Step 2: call down_write(&adev->reset_sem) to hold write lock, which will block recovery thread until other threads release read locks.
>>
>> Is the problem here that HW is suspended while some other threads that rely on the read side lock still access HW ? Mostly what I am thinking about are IOCTls - we can't 'wait for them to complete' but they might be accessing HW when we start suspend.
>>
>> Andrey
>>
>>
>>> Christian.
>>>
>>> Am 09.03.21 um 17:40 schrieb Andrey Grodzovsky:
>>>> Because he takes the write side lock post amdgpu_pre_asic_reset -
>>>> where HW suspend sequence happens (touching registers) - so i think
>>>> it will assert.
>>>>
>>>> Andrey
>>>>
>>>> On 2021-03-09 7:56 a.m., Christian König wrote:
>>>>> Hi Dennis,
>>>>>
>>>>> why do you think that this will always assert in reset thread?
>>>>>
>>>>> In the reset thread while we are holding the reset lock write side
>>>>> lockdep_assert_held() should be satisfied and not cause any splat
>>>>> in the system log.
>>>>>
>>>>> Regards,
>>>>> Christian.
>>>>>
>>>>> Am 09.03.21 um 03:03 schrieb Li, Dennis:
>>>>>> [AMD Official Use Only - Internal Distribution Only]
>>>>>>
>>>>>> Hi, Christian,
>>>>>>           amdgpu_device_skip_hw_access will always assert in
>>>>>> reset thread, which seems not a good idea.
>>>>>>
>>>>>> Best Regards
>>>>>> Dennis Li
>>>>>> -----Original Message-----
>>>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>>>> Sent: Tuesday, March 9, 2021 2:07 AM
>>>>>> To: amd-gfx@lists.freedesktop.org
>>>>>> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Li, Dennis
>>>>>> <Dennis.Li@amd.com>
>>>>>> Subject: [PATCH] drm/amdgpu: capture invalid hardware access v2
>>>>>>
>>>>>> From: Dennis Li <Dennis.Li@amd.com>
>>>>>>
>>>>>> When recovery thread has begun GPU reset, there should be not
>>>>>> other threads to access hardware, otherwise system randomly hang.
>>>>>>
>>>>>> v2 (chk): rewritten from scratch, use trylock and lockdep instead
>>>>>> of
>>>>>>      hand wiring the logic.
>>>>>>
>>>>>> Signed-off-by: Dennis Li <Dennis.Li@amd.com>
>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74
>>>>>> +++++++++++++++++-----
>>>>>>     1 file changed, 57 insertions(+), 17 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> index e247c3a2ec08..c990af6a43ca 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>> @@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct
>>>>>> amdgpu_device *adev, loff_t pos,
>>>>>>     /*
>>>>>>      * register access helper functions.
>>>>>>      */
>>>>>> +
>>>>>> +/* Check if hw access should be skipped because of hotplug or
>>>>>> +device error */ static bool amdgpu_device_skip_hw_access(struct
>>>>>> +amdgpu_device
>>>>>> +*adev) {
>>>>>> +if (adev->in_pci_err_recovery)
>>>>>> +return true;
>>>>>> +
>>>>>> +#ifdef CONFIG_LOCKDEP
>>>>>> +/*
>>>>>> + * This is a bit complicated to understand, so worth a comment.
>>>>>> What we assert
>>>>>> + * here is that the GPU reset is not running on another thread
>>>>>> + in
>>>>>> parallel.
>>>>>> + *
>>>>>> + * For this we trylock the read side of the reset semaphore, if
>>>>>> that succeeds
>>>>>> + * we know that the reset is not running in paralell.
>>>>>> + *
>>>>>> + * If the trylock fails we assert that we are either already
>>>>>> holding the read
>>>>>> + * side of the lock or are the reset thread itself and hold the
>>>>>> write side of
>>>>>> + * the lock.
>>>>>> + */
>>>>>> +if (down_read_trylock(&adev->reset_sem))
>>>>>> +up_read(&adev->reset_sem);
>>>>>> +else
>>>>>> +lockdep_assert_held(&adev->reset_sem);
>>>>>> +#endif
>>>>>> +
>>>>>> +return false;
>>>>>> +}
>>>>>> +
>>>>>>     /**
>>>>>>      * amdgpu_device_rreg - read a memory mapped IO or indirect
>>>>>> register
>>>>>>      *
>>>>>> @@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct
>>>>>> amdgpu_device *adev,  {
>>>>>>     uint32_t ret;
>>>>>>
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return 0;
>>>>>>
>>>>>>     if ((reg * 4) < adev->rmmio_size) { @@ -377,7 +405,7 @@
>>>>>> uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>>>>>>      */
>>>>>>     uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t
>>>>>> offset)  {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return 0;
>>>>>>
>>>>>>     if (offset < adev->rmmio_size) @@ -402,7 +430,7 @@ uint8_t
>>>>>> amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
>>>>>>      */
>>>>>>     void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t
>>>>>> offset, uint8_t value)  { -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return;
>>>>>>
>>>>>>     if (offset < adev->rmmio_size) @@ -425,7 +453,7 @@ void
>>>>>> amdgpu_device_wreg(struct amdgpu_device *adev,
>>>>>>     uint32_t reg, uint32_t v,
>>>>>>     uint32_t acc_flags)
>>>>>>     {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return;
>>>>>>
>>>>>>     if ((reg * 4) < adev->rmmio_size) { @@ -452,7 +480,7 @@ void
>>>>>> amdgpu_device_wreg(struct amdgpu_device *adev,  void
>>>>>> amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>>>>          uint32_t reg, uint32_t v)
>>>>>>     {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return;
>>>>>>
>>>>>>     if (amdgpu_sriov_fullaccess(adev) && @@ -475,7 +503,7 @@ void
>>>>>> amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>>>>      */
>>>>>>     u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)  {
>>>>>> -if
>>>>>> (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return 0;
>>>>>>
>>>>>>     if ((reg * 4) < adev->rio_mem_size) @@ -497,7 +525,7 @@ u32
>>>>>> amdgpu_io_rreg(struct amdgpu_device *adev,
>>>>>> u32 reg)
>>>>>>      */
>>>>>>     void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32
>>>>>> v) { -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return;
>>>>>>
>>>>>>     if ((reg * 4) < adev->rio_mem_size) @@ -519,7 +547,7 @@ void
>>>>>> amdgpu_io_wreg(struct amdgpu_device *adev,
>>>>>> u32 reg, u32 v)
>>>>>>      */
>>>>>>     u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32
>>>>>> index) { -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return 0;
>>>>>>
>>>>>>     if (index < adev->doorbell.num_doorbells) { @@ -542,7 +570,7
>>>>>> @@
>>>>>> u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
>>>>>>      */
>>>>>>     void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32
>>>>>> index,
>>>>>> u32 v)  {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return;
>>>>>>
>>>>>>     if (index < adev->doorbell.num_doorbells) { @@ -563,7 +591,7
>>>>>> @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32
>>>>>> index,
>>>>>> u32
>>>>>> v)
>>>>>>      */
>>>>>>     u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32
>>>>>> index) { -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return 0;
>>>>>>
>>>>>>     if (index < adev->doorbell.num_doorbells) { @@ -586,7 +614,7
>>>>>> @@
>>>>>> u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
>>>>>>      */
>>>>>>     void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32
>>>>>> index,
>>>>>> u64 v)  {
>>>>>> -if (adev->in_pci_err_recovery)
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>     return;
>>>>>>
>>>>>>     if (index < adev->doorbell.num_doorbells) { @@ -610,10
>>>>>> +638,13 @@
>>>>>> u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
>>>>>>     u32 pcie_index, u32 pcie_data,
>>>>>>     u32 reg_addr)
>>>>>>     {
>>>>>> -unsigned long flags;
>>>>>> -u32 r;
>>>>>>     void __iomem *pcie_index_offset;
>>>>>>     void __iomem *pcie_data_offset;
>>>>>> +unsigned long flags;
>>>>>> +u32 r;
>>>>>> +
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>> +return 0;
>>>>>>
>>>>>>     spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>     pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index
>>>>>> * 4; @@ -641,10 +672,13 @@ u64
>>>>>> amdgpu_device_indirect_rreg64(struct
>>>>>> amdgpu_device *adev,
>>>>>>       u32 pcie_index, u32 pcie_data,
>>>>>>       u32 reg_addr)
>>>>>>     {
>>>>>> -unsigned long flags;
>>>>>> -u64 r;
>>>>>>     void __iomem *pcie_index_offset;
>>>>>>     void __iomem *pcie_data_offset;
>>>>>> +unsigned long flags;
>>>>>> +u64 r;
>>>>>> +
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>> +return 0;
>>>>>>
>>>>>>     spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>     pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index
>>>>>> * 4; @@ -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct
>>>>>> amdgpu_device *adev,
>>>>>>      u32 pcie_index, u32 pcie_data,
>>>>>>      u32 reg_addr, u32 reg_data)
>>>>>>     {
>>>>>> -unsigned long flags;
>>>>>>     void __iomem *pcie_index_offset;
>>>>>>     void __iomem *pcie_data_offset;
>>>>>> +unsigned long flags;
>>>>>> +
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>> +return;
>>>>>>
>>>>>>     spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>     pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index
>>>>>> * 4; @@ -706,9 +743,12 @@ void
>>>>>> amdgpu_device_indirect_wreg64(struct
>>>>>> amdgpu_device *adev,
>>>>>>        u32 pcie_index, u32 pcie_data,
>>>>>>        u32 reg_addr, u64 reg_data)
>>>>>>     {
>>>>>> -unsigned long flags;
>>>>>>     void __iomem *pcie_index_offset;
>>>>>>     void __iomem *pcie_data_offset;
>>>>>> +unsigned long flags;
>>>>>> +
>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>> +return;
>>>>>>
>>>>>>     spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>     pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index
>>>>>> * 4;
>>>>>> --
>>>>>> 2.25.1
>>>>>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
  2021-03-10 13:19                   ` Li, Dennis
@ 2021-03-10 13:41                     ` Christian König
  0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2021-03-10 13:41 UTC (permalink / raw)
  To: Li, Dennis, Grodzovsky, Andrey, amd-gfx

> Are there chances to let driver complete scheduler fences?

Nope, see the documentation Daniel linked in this thread.

If we would want to complete the scheduler fences and release the memory 
protected by them we would need to completely disable the device.

E.g. similar to a hotplug event when you rip out the hardware and put it 
back in again.

That would mean that applications not only need to reinitialize their 
contexts, but also completely restart and reallocate their memory.

Thinking more about it when we go down that route we wouldn't even need 
the reset lock any more in the first place :)

Something else you should keep in mind is that we already have tried 
this approach two times now and both times came to the same conclusion 
after spending a lot of work trying to fix this up.

Christian.

Am 10.03.21 um 14:19 schrieb Li, Dennis:
> [AMD Official Use Only - Internal Distribution Only]
>
> Hi, Christian,
>
>>> We can only complete the low level hardware fences, but not the scheduler fences since those need to wait for the resubmission of the jobs and with it finishing the GPU reset.
> Correct. I run a deadlock test and captured this case today.  Are there chances to let driver complete scheduler fences? If decide to do GPU reset, the pending rendering job in fact should be recreated because of vram lost. Currently, user application will recreate context when if driver report VRAM lost.
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Wednesday, March 10, 2021 9:03 PM
> To: Li, Dennis <Dennis.Li@amd.com>; Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
>
>> I assumed that pre-reset functions could complete all fences
> And exactly that's incorrect.
>
> We can only complete the low level hardware fences, but not the scheduler fences since those need to wait for the resubmission of the jobs and with it finishing the GPU reset.
>
> Regards,
> Christian.
>
> Am 10.03.21 um 07:40 schrieb Li, Dennis:
>> [AMD Official Use Only - Internal Distribution Only]
>>
>>>>> But how this will help if TDR thread will start after you both took read lock and checked that adev->in_gpu_reset is false ? Since TDR  now takes write lock only after suspending HW and waiting for all fences there is nothing that prevents both threads (e.g IOCTL and TDR) to access registers concurrently.
>> When read thread both took read lock and checked that adev->in_gpu_reset is false, it means that recovery thread still not start, and read thread will continue, it will exit normally, or exit because of fence completed by pre-reset functions. I assumed that pre-reset functions could complete all fences, which could make related threads exit and release read locks. About this, Christian has different thinking, I am trying to understand his concern.  TDR will be blocked until all read locks are released.
>>
>> Best Regards
>> Dennis Li
>> -----Original Message-----
>> From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
>> Sent: Wednesday, March 10, 2021 1:42 PM
>> To: Li, Dennis <Dennis.Li@amd.com>; Christian König
>> <ckoenig.leichtzumerken@gmail.com>; amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
>>
>> But how this will help if TDR thread will start after you both took read lock and checked that adev->in_gpu_reset is false ? Since TDR  now takes write lock only after suspending HW and waiting for all fences there is nothing that prevents both threads (e.g IOCTL and TDR) to access registers concurrently.
>>
>> Andrey
>>
>> On 2021-03-09 9:59 p.m., Li, Dennis wrote:
>>> [AMD Official Use Only - Internal Distribution Only]
>>>
>>> Hi, Andrey,
>>>>>> Is the problem here that HW is suspended while some other threads that rely on the read side lock still access HW ? Mostly what I am thinking about are IOCTls - we can't 'wait for them to complete' but they might be accessing HW when we start suspend.
>>> In read side, when the reader held the read lock, it will also check whether adev->in_gpu_reset is 1, if so, it will release read clock and is waiting for recovery finish event.
>>> Best Regards
>>> Dennis Li
>>>
>>> -----Original Message-----
>>> From: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
>>> Sent: Wednesday, March 10, 2021 2:26 AM
>>> To: Christian König <ckoenig.leichtzumerken@gmail.com>; Li, Dennis
>>> <Dennis.Li@amd.com>; amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH] drm/amdgpu: capture invalid hardware access v2
>>>
>>> On 2021-03-09 12:47 p.m., Christian König wrote:
>>>> No it won't. Accessing the hardware without the lock is ok as long
>>>> as the write side isn't taken.
>>> Oh, forgot about the trylock part, sorry...
>>>
>>>> But that approach is illegal anyway because we suspend the hardware
>>>> without proper protection from concurrent access.
>>> For my understanding and from looking again at his steps related to
>>> this
>>>
>>> Step 0: atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) - [AG] protects
>>> from other TDR threads
>>>
>>> Step 1: cancel all delay works, stop drm schedule, complete all
>>> unreceived fences and so on. Call amdgpu_device_pre_asic_reset...
>>> e.t.c
>>> - [AG] this is the HW suspend part
>>>
>>> Step 2: call down_write(&adev->reset_sem) to hold write lock, which will block recovery thread until other threads release read locks.
>>>
>>> Is the problem here that HW is suspended while some other threads that rely on the read side lock still access HW ? Mostly what I am thinking about are IOCTls - we can't 'wait for them to complete' but they might be accessing HW when we start suspend.
>>>
>>> Andrey
>>>
>>>
>>>> Christian.
>>>>
>>>> Am 09.03.21 um 17:40 schrieb Andrey Grodzovsky:
>>>>> Because he takes the write side lock post amdgpu_pre_asic_reset -
>>>>> where HW suspend sequence happens (touching registers) - so i think
>>>>> it will assert.
>>>>>
>>>>> Andrey
>>>>>
>>>>> On 2021-03-09 7:56 a.m., Christian König wrote:
>>>>>> Hi Dennis,
>>>>>>
>>>>>> why do you think that this will always assert in reset thread?
>>>>>>
>>>>>> In the reset thread while we are holding the reset lock write side
>>>>>> lockdep_assert_held() should be satisfied and not cause any splat
>>>>>> in the system log.
>>>>>>
>>>>>> Regards,
>>>>>> Christian.
>>>>>>
>>>>>> Am 09.03.21 um 03:03 schrieb Li, Dennis:
>>>>>>> [AMD Official Use Only - Internal Distribution Only]
>>>>>>>
>>>>>>> Hi, Christian,
>>>>>>>            amdgpu_device_skip_hw_access will always assert in
>>>>>>> reset thread, which seems not a good idea.
>>>>>>>
>>>>>>> Best Regards
>>>>>>> Dennis Li
>>>>>>> -----Original Message-----
>>>>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>>>>> Sent: Tuesday, March 9, 2021 2:07 AM
>>>>>>> To: amd-gfx@lists.freedesktop.org
>>>>>>> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Li, Dennis
>>>>>>> <Dennis.Li@amd.com>
>>>>>>> Subject: [PATCH] drm/amdgpu: capture invalid hardware access v2
>>>>>>>
>>>>>>> From: Dennis Li <Dennis.Li@amd.com>
>>>>>>>
>>>>>>> When recovery thread has begun GPU reset, there should be not
>>>>>>> other threads to access hardware, otherwise system randomly hang.
>>>>>>>
>>>>>>> v2 (chk): rewritten from scratch, use trylock and lockdep instead
>>>>>>> of
>>>>>>>       hand wiring the logic.
>>>>>>>
>>>>>>> Signed-off-by: Dennis Li <Dennis.Li@amd.com>
>>>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>>>> ---
>>>>>>>      drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 74
>>>>>>> +++++++++++++++++-----
>>>>>>>      1 file changed, 57 insertions(+), 17 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> index e247c3a2ec08..c990af6a43ca 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>>>> @@ -326,6 +326,34 @@ void amdgpu_device_vram_access(struct
>>>>>>> amdgpu_device *adev, loff_t pos,
>>>>>>>      /*
>>>>>>>       * register access helper functions.
>>>>>>>       */
>>>>>>> +
>>>>>>> +/* Check if hw access should be skipped because of hotplug or
>>>>>>> +device error */ static bool amdgpu_device_skip_hw_access(struct
>>>>>>> +amdgpu_device
>>>>>>> +*adev) {
>>>>>>> +if (adev->in_pci_err_recovery)
>>>>>>> +return true;
>>>>>>> +
>>>>>>> +#ifdef CONFIG_LOCKDEP
>>>>>>> +/*
>>>>>>> + * This is a bit complicated to understand, so worth a comment.
>>>>>>> What we assert
>>>>>>> + * here is that the GPU reset is not running on another thread
>>>>>>> + in
>>>>>>> parallel.
>>>>>>> + *
>>>>>>> + * For this we trylock the read side of the reset semaphore, if
>>>>>>> that succeeds
>>>>>>> + * we know that the reset is not running in paralell.
>>>>>>> + *
>>>>>>> + * If the trylock fails we assert that we are either already
>>>>>>> holding the read
>>>>>>> + * side of the lock or are the reset thread itself and hold the
>>>>>>> write side of
>>>>>>> + * the lock.
>>>>>>> + */
>>>>>>> +if (down_read_trylock(&adev->reset_sem))
>>>>>>> +up_read(&adev->reset_sem);
>>>>>>> +else
>>>>>>> +lockdep_assert_held(&adev->reset_sem);
>>>>>>> +#endif
>>>>>>> +
>>>>>>> +return false;
>>>>>>> +}
>>>>>>> +
>>>>>>>      /**
>>>>>>>       * amdgpu_device_rreg - read a memory mapped IO or indirect
>>>>>>> register
>>>>>>>       *
>>>>>>> @@ -340,7 +368,7 @@ uint32_t amdgpu_device_rreg(struct
>>>>>>> amdgpu_device *adev,  {
>>>>>>>      uint32_t ret;
>>>>>>>
>>>>>>> -if (adev->in_pci_err_recovery)
>>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>>      return 0;
>>>>>>>
>>>>>>>      if ((reg * 4) < adev->rmmio_size) { @@ -377,7 +405,7 @@
>>>>>>> uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
>>>>>>>       */
>>>>>>>      uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t
>>>>>>> offset)  {
>>>>>>> -if (adev->in_pci_err_recovery)
>>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>>      return 0;
>>>>>>>
>>>>>>>      if (offset < adev->rmmio_size) @@ -402,7 +430,7 @@ uint8_t
>>>>>>> amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
>>>>>>>       */
>>>>>>>      void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t
>>>>>>> offset, uint8_t value)  { -if (adev->in_pci_err_recovery)
>>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>>      return;
>>>>>>>
>>>>>>>      if (offset < adev->rmmio_size) @@ -425,7 +453,7 @@ void
>>>>>>> amdgpu_device_wreg(struct amdgpu_device *adev,
>>>>>>>      uint32_t reg, uint32_t v,
>>>>>>>      uint32_t acc_flags)
>>>>>>>      {
>>>>>>> -if (adev->in_pci_err_recovery)
>>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>>      return;
>>>>>>>
>>>>>>>      if ((reg * 4) < adev->rmmio_size) { @@ -452,7 +480,7 @@ void
>>>>>>> amdgpu_device_wreg(struct amdgpu_device *adev,  void
>>>>>>> amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>>>>>           uint32_t reg, uint32_t v)
>>>>>>>      {
>>>>>>> -if (adev->in_pci_err_recovery)
>>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>>      return;
>>>>>>>
>>>>>>>      if (amdgpu_sriov_fullaccess(adev) && @@ -475,7 +503,7 @@ void
>>>>>>> amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
>>>>>>>       */
>>>>>>>      u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)  {
>>>>>>> -if
>>>>>>> (adev->in_pci_err_recovery)
>>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>>      return 0;
>>>>>>>
>>>>>>>      if ((reg * 4) < adev->rio_mem_size) @@ -497,7 +525,7 @@ u32
>>>>>>> amdgpu_io_rreg(struct amdgpu_device *adev,
>>>>>>> u32 reg)
>>>>>>>       */
>>>>>>>      void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32
>>>>>>> v) { -if (adev->in_pci_err_recovery)
>>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>>      return;
>>>>>>>
>>>>>>>      if ((reg * 4) < adev->rio_mem_size) @@ -519,7 +547,7 @@ void
>>>>>>> amdgpu_io_wreg(struct amdgpu_device *adev,
>>>>>>> u32 reg, u32 v)
>>>>>>>       */
>>>>>>>      u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32
>>>>>>> index) { -if (adev->in_pci_err_recovery)
>>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>>      return 0;
>>>>>>>
>>>>>>>      if (index < adev->doorbell.num_doorbells) { @@ -542,7 +570,7
>>>>>>> @@
>>>>>>> u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
>>>>>>>       */
>>>>>>>      void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32
>>>>>>> index,
>>>>>>> u32 v)  {
>>>>>>> -if (adev->in_pci_err_recovery)
>>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>>      return;
>>>>>>>
>>>>>>>      if (index < adev->doorbell.num_doorbells) { @@ -563,7 +591,7
>>>>>>> @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32
>>>>>>> index,
>>>>>>> u32
>>>>>>> v)
>>>>>>>       */
>>>>>>>      u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32
>>>>>>> index) { -if (adev->in_pci_err_recovery)
>>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>>      return 0;
>>>>>>>
>>>>>>>      if (index < adev->doorbell.num_doorbells) { @@ -586,7 +614,7
>>>>>>> @@
>>>>>>> u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
>>>>>>>       */
>>>>>>>      void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32
>>>>>>> index,
>>>>>>> u64 v)  {
>>>>>>> -if (adev->in_pci_err_recovery)
>>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>>      return;
>>>>>>>
>>>>>>>      if (index < adev->doorbell.num_doorbells) { @@ -610,10
>>>>>>> +638,13 @@
>>>>>>> u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
>>>>>>>      u32 pcie_index, u32 pcie_data,
>>>>>>>      u32 reg_addr)
>>>>>>>      {
>>>>>>> -unsigned long flags;
>>>>>>> -u32 r;
>>>>>>>      void __iomem *pcie_index_offset;
>>>>>>>      void __iomem *pcie_data_offset;
>>>>>>> +unsigned long flags;
>>>>>>> +u32 r;
>>>>>>> +
>>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>> +return 0;
>>>>>>>
>>>>>>>      spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>>      pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index
>>>>>>> * 4; @@ -641,10 +672,13 @@ u64
>>>>>>> amdgpu_device_indirect_rreg64(struct
>>>>>>> amdgpu_device *adev,
>>>>>>>        u32 pcie_index, u32 pcie_data,
>>>>>>>        u32 reg_addr)
>>>>>>>      {
>>>>>>> -unsigned long flags;
>>>>>>> -u64 r;
>>>>>>>      void __iomem *pcie_index_offset;
>>>>>>>      void __iomem *pcie_data_offset;
>>>>>>> +unsigned long flags;
>>>>>>> +u64 r;
>>>>>>> +
>>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>> +return 0;
>>>>>>>
>>>>>>>      spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>>      pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index
>>>>>>> * 4; @@ -677,9 +711,12 @@ void amdgpu_device_indirect_wreg(struct
>>>>>>> amdgpu_device *adev,
>>>>>>>       u32 pcie_index, u32 pcie_data,
>>>>>>>       u32 reg_addr, u32 reg_data)
>>>>>>>      {
>>>>>>> -unsigned long flags;
>>>>>>>      void __iomem *pcie_index_offset;
>>>>>>>      void __iomem *pcie_data_offset;
>>>>>>> +unsigned long flags;
>>>>>>> +
>>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>> +return;
>>>>>>>
>>>>>>>      spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>>      pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index
>>>>>>> * 4; @@ -706,9 +743,12 @@ void
>>>>>>> amdgpu_device_indirect_wreg64(struct
>>>>>>> amdgpu_device *adev,
>>>>>>>         u32 pcie_index, u32 pcie_data,
>>>>>>>         u32 reg_addr, u64 reg_data)
>>>>>>>      {
>>>>>>> -unsigned long flags;
>>>>>>>      void __iomem *pcie_index_offset;
>>>>>>>      void __iomem *pcie_data_offset;
>>>>>>> +unsigned long flags;
>>>>>>> +
>>>>>>> +if (amdgpu_device_skip_hw_access(adev))
>>>>>>> +return;
>>>>>>>
>>>>>>>      spin_lock_irqsave(&adev->pcie_idx_lock, flags);
>>>>>>>      pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index
>>>>>>> * 4;
>>>>>>> --
>>>>>>> 2.25.1
>>>>>>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2021-03-10 13:41 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-08 18:06 [PATCH] drm/amdgpu: capture invalid hardware access v2 Christian König
2021-03-09  2:03 ` Li, Dennis
2021-03-09 12:56   ` Christian König
2021-03-09 16:40     ` Andrey Grodzovsky
2021-03-09 17:47       ` Christian König
2021-03-09 18:26         ` Andrey Grodzovsky
2021-03-09 18:51           ` Christian König
2021-03-09 19:13             ` Andrey Grodzovsky
2021-03-09 21:13               ` Daniel Vetter
2021-03-10  2:59           ` Li, Dennis
2021-03-10  5:42             ` Andrey Grodzovsky
2021-03-10  6:40               ` Li, Dennis
2021-03-10 13:03                 ` Christian König
2021-03-10 13:19                   ` Li, Dennis
2021-03-10 13:41                     ` Christian König
2021-03-10  3:12     ` Li, Dennis

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.