* [PATCH] drm/amdgpu: protect ring overrun
@ 2020-04-23 9:06 Yintian Tao
2020-04-23 12:21 ` Christian König
0 siblings, 1 reply; 5+ messages in thread
From: Yintian Tao @ 2020-04-23 9:06 UTC (permalink / raw)
To: christian.koenig, monk.liu, shaoyun.liu; +Cc: amd-gfx, Yintian Tao
Wait for the oldest sequence on the ring
to be signaled in order to make sure there
will be no command overrun.
v2: fix coding stype and remove abs operation
Signed-off-by: Yintian Tao <yttao@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 10 +++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 22 ++++++++++++++++++----
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 +++++++-
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 -
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 -
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 +++++++++++---
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 8 +++++++-
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++++++-
9 files changed, 61 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7531527067df..397bd5fa77cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -192,14 +192,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
* Used For polling fence.
* Returns 0 on success, -ENOMEM on failure.
*/
-int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
+int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
+ uint32_t timeout)
{
uint32_t seq;
+ signed long r = 0;
if (!s)
return -EINVAL;
seq = ++ring->fence_drv.sync_seq;
+ r = amdgpu_fence_wait_polling(ring,
+ seq - ring->fence_drv.num_fences_mask,
+ timeout);
+ if (r < 1)
+ return -ETIMEDOUT;
+
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a721b0e0ff69..0103acc57474 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -675,13 +675,15 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
spin_lock_irqsave(&kiq->ring_lock, flags);
if (amdgpu_device_wb_get(adev, ®_val_offs)) {
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
pr_err("critical bug! too many kiq readers\n");
- goto failed_kiq_read;
+ goto failed_unlock;
}
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
- amdgpu_fence_emit_polling(ring, &seq);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
@@ -712,7 +714,13 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
amdgpu_device_wb_free(adev, reg_val_offs);
return value;
+failed_undo:
+ amdgpu_ring_undo(ring);
+failed_unlock:
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
failed_kiq_read:
+ if (reg_val_offs)
+ amdgpu_device_wb_free(adev, reg_val_offs);
pr_err("failed to read reg:%x\n", reg);
return ~0;
}
@@ -730,7 +738,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);
- amdgpu_fence_emit_polling(ring, &seq);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
@@ -759,6 +770,9 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
return;
+failed_undo:
+ amdgpu_ring_undo(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
failed_kiq_write:
pr_err("failed to write reg:%x\n", reg);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 137d3d2b46e8..be218754629a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -118,7 +118,8 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
unsigned flags);
-int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
+int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
+ uint32_t timeout);
bool amdgpu_fence_process(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 8c10084f44ef..cbbb8d02535a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -60,7 +60,10 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
ref, mask);
- amdgpu_fence_emit_polling(ring, &seq);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
@@ -82,6 +85,9 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
return;
+failed_undo:
+ amdgpu_ring_undo(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
failed_kiq:
pr_err("failed to write reg %x wait reg %x\n", reg0, reg1);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 8a579ce8757d..2b6d7687f4cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7605,7 +7605,6 @@ static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs)
{
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
amdgpu_ring_write(ring, 0 | /* src: register*/
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 6c56cedce4c3..6ae78b9e9551 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6393,7 +6393,6 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs)
{
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_kiq *kiq = &adev->gfx.kiq;
amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
amdgpu_ring_write(ring, 0 | /* src: register*/
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 09aa5f509bd2..be05ab221b16 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4054,9 +4054,8 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
spin_lock_irqsave(&kiq->ring_lock, flags);
if (amdgpu_device_wb_get(adev, ®_val_offs)) {
- spin_unlock_irqrestore(&kiq->ring_lock, flags);
pr_err("critical bug! too many kiq readers\n");
- goto failed_kiq_read;
+ goto failed_unlock;
}
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
@@ -4070,7 +4069,10 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
reg_val_offs * 4));
amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
reg_val_offs * 4));
- amdgpu_fence_emit_polling(ring, &seq);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
@@ -4102,7 +4104,13 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
amdgpu_device_wb_free(adev, reg_val_offs);
return value;
+failed_undo:
+ amdgpu_ring_undo(ring);
+failed_unlock:
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
failed_kiq_read:
+ if (reg_val_offs)
+ amdgpu_device_wb_free(adev, reg_val_offs);
pr_err("failed to read gpu clock\n");
return ~0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 30b75d79efdb..b120f9160f13 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -427,7 +427,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
kiq->pmf->kiq_invalidate_tlbs(ring,
pasid, flush_type, all_hub);
- amdgpu_fence_emit_polling(ring, &seq);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ spin_unlock(&kiq->ring_lock);
+ return -ETIME;
+ }
+
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index fecdbc471983..0a6026308343 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -621,7 +621,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
pasid, 2, all_hub);
kiq->pmf->kiq_invalidate_tlbs(ring,
pasid, flush_type, all_hub);
- amdgpu_fence_emit_polling(ring, &seq);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ spin_unlock(&kiq->ring_lock);
+ return -ETIME;
+ }
+
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] drm/amdgpu: protect ring overrun
2020-04-23 9:06 [PATCH] drm/amdgpu: protect ring overrun Yintian Tao
@ 2020-04-23 12:21 ` Christian König
2020-04-23 12:29 ` Tao, Yintian
0 siblings, 1 reply; 5+ messages in thread
From: Christian König @ 2020-04-23 12:21 UTC (permalink / raw)
To: Yintian Tao, christian.koenig, monk.liu, shaoyun.liu; +Cc: amd-gfx
Am 23.04.20 um 11:06 schrieb Yintian Tao:
> Wait for the oldest sequence on the ring
> to be signaled in order to make sure there
> will be no command overrun.
>
> v2: fix coding stype and remove abs operation
One nit pick below, with that fixed the patch is Reviewed-by: Christian
König <christian.koenig@amd.com>
>
> Signed-off-by: Yintian Tao <yttao@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 10 +++++++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 22 ++++++++++++++++++----
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 ++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 +++++++-
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 -
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 -
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 +++++++++++---
> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 8 +++++++-
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++++++-
> 9 files changed, 61 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index 7531527067df..397bd5fa77cb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -192,14 +192,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
> * Used For polling fence.
> * Returns 0 on success, -ENOMEM on failure.
> */
> -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
> +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
> + uint32_t timeout)
> {
> uint32_t seq;
> + signed long r = 0;
Please drop the initialization of r here. That is usually seen as rather
bad style because it prevents the compiler from raising an warning when
this really isn't initialized.
Regards,
Christian.
>
> if (!s)
> return -EINVAL;
>
> seq = ++ring->fence_drv.sync_seq;
> + r = amdgpu_fence_wait_polling(ring,
> + seq - ring->fence_drv.num_fences_mask,
> + timeout);
> + if (r < 1)
> + return -ETIMEDOUT;
> +
> amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
> seq, 0);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index a721b0e0ff69..0103acc57474 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -675,13 +675,15 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>
> spin_lock_irqsave(&kiq->ring_lock, flags);
> if (amdgpu_device_wb_get(adev, ®_val_offs)) {
> - spin_unlock_irqrestore(&kiq->ring_lock, flags);
> pr_err("critical bug! too many kiq readers\n");
> - goto failed_kiq_read;
> + goto failed_unlock;
> }
> amdgpu_ring_alloc(ring, 32);
> amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r)
> + goto failed_undo;
> +
> amdgpu_ring_commit(ring);
> spin_unlock_irqrestore(&kiq->ring_lock, flags);
>
> @@ -712,7 +714,13 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
> amdgpu_device_wb_free(adev, reg_val_offs);
> return value;
>
> +failed_undo:
> + amdgpu_ring_undo(ring);
> +failed_unlock:
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> failed_kiq_read:
> + if (reg_val_offs)
> + amdgpu_device_wb_free(adev, reg_val_offs);
> pr_err("failed to read reg:%x\n", reg);
> return ~0;
> }
> @@ -730,7 +738,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
> spin_lock_irqsave(&kiq->ring_lock, flags);
> amdgpu_ring_alloc(ring, 32);
> amdgpu_ring_emit_wreg(ring, reg, v);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r)
> + goto failed_undo;
> +
> amdgpu_ring_commit(ring);
> spin_unlock_irqrestore(&kiq->ring_lock, flags);
>
> @@ -759,6 +770,9 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
>
> return;
>
> +failed_undo:
> + amdgpu_ring_undo(ring);
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> failed_kiq_write:
> pr_err("failed to write reg:%x\n", reg);
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 137d3d2b46e8..be218754629a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -118,7 +118,8 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
> void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
> int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
> unsigned flags);
> -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
> +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
> + uint32_t timeout);
> bool amdgpu_fence_process(struct amdgpu_ring *ring);
> int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
> signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 8c10084f44ef..cbbb8d02535a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -60,7 +60,10 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
> amdgpu_ring_alloc(ring, 32);
> amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
> ref, mask);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r)
> + goto failed_undo;
> +
> amdgpu_ring_commit(ring);
> spin_unlock_irqrestore(&kiq->ring_lock, flags);
>
> @@ -82,6 +85,9 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
>
> return;
>
> +failed_undo:
> + amdgpu_ring_undo(ring);
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> failed_kiq:
> pr_err("failed to write reg %x wait reg %x\n", reg0, reg1);
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 8a579ce8757d..2b6d7687f4cb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -7605,7 +7605,6 @@ static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
> uint32_t reg_val_offs)
> {
> struct amdgpu_device *adev = ring->adev;
> - struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>
> amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
> amdgpu_ring_write(ring, 0 | /* src: register*/
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 6c56cedce4c3..6ae78b9e9551 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6393,7 +6393,6 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
> uint32_t reg_val_offs)
> {
> struct amdgpu_device *adev = ring->adev;
> - struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>
> amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
> amdgpu_ring_write(ring, 0 | /* src: register*/
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 09aa5f509bd2..be05ab221b16 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -4054,9 +4054,8 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
>
> spin_lock_irqsave(&kiq->ring_lock, flags);
> if (amdgpu_device_wb_get(adev, ®_val_offs)) {
> - spin_unlock_irqrestore(&kiq->ring_lock, flags);
> pr_err("critical bug! too many kiq readers\n");
> - goto failed_kiq_read;
> + goto failed_unlock;
> }
> amdgpu_ring_alloc(ring, 32);
> amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
> @@ -4070,7 +4069,10 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
> reg_val_offs * 4));
> amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
> reg_val_offs * 4));
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r)
> + goto failed_undo;
> +
> amdgpu_ring_commit(ring);
> spin_unlock_irqrestore(&kiq->ring_lock, flags);
>
> @@ -4102,7 +4104,13 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
> amdgpu_device_wb_free(adev, reg_val_offs);
> return value;
>
> +failed_undo:
> + amdgpu_ring_undo(ring);
> +failed_unlock:
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> failed_kiq_read:
> + if (reg_val_offs)
> + amdgpu_device_wb_free(adev, reg_val_offs);
> pr_err("failed to read gpu clock\n");
> return ~0;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 30b75d79efdb..b120f9160f13 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -427,7 +427,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
> kiq->pmf->kiq_invalidate_tlbs(ring,
> pasid, flush_type, all_hub);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r) {
> + amdgpu_ring_undo(ring);
> + spin_unlock(&kiq->ring_lock);
> + return -ETIME;
> + }
> +
> amdgpu_ring_commit(ring);
> spin_unlock(&adev->gfx.kiq.ring_lock);
> r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index fecdbc471983..0a6026308343 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -621,7 +621,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> pasid, 2, all_hub);
> kiq->pmf->kiq_invalidate_tlbs(ring,
> pasid, flush_type, all_hub);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r) {
> + amdgpu_ring_undo(ring);
> + spin_unlock(&kiq->ring_lock);
> + return -ETIME;
> + }
> +
> amdgpu_ring_commit(ring);
> spin_unlock(&adev->gfx.kiq.ring_lock);
> r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH] drm/amdgpu: protect ring overrun
2020-04-23 12:21 ` Christian König
@ 2020-04-23 12:29 ` Tao, Yintian
0 siblings, 0 replies; 5+ messages in thread
From: Tao, Yintian @ 2020-04-23 12:29 UTC (permalink / raw)
To: Koenig, Christian, Liu, Monk, Liu, Shaoyun; +Cc: amd-gfx
Hi Christian
Thanks. I will remove the initialization of r.
Best Regards
Yintian Tao
-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com>
Sent: 2020年4月23日 20:22
To: Tao, Yintian <Yintian.Tao@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Liu, Monk <Monk.Liu@amd.com>; Liu, Shaoyun <Shaoyun.Liu@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu: protect ring overrun
Am 23.04.20 um 11:06 schrieb Yintian Tao:
> Wait for the oldest sequence on the ring to be signaled in order to
> make sure there will be no command overrun.
>
> v2: fix coding stype and remove abs operation
One nit pick below, with that fixed the patch is Reviewed-by: Christian König <christian.koenig@amd.com>
>
> Signed-off-by: Yintian Tao <yttao@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 10 +++++++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 22 ++++++++++++++++++----
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 ++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 +++++++-
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 1 -
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 -
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 +++++++++++---
> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 8 +++++++-
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++++++-
> 9 files changed, 61 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index 7531527067df..397bd5fa77cb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -192,14 +192,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f,
> * Used For polling fence.
> * Returns 0 on success, -ENOMEM on failure.
> */
> -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
> +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
> + uint32_t timeout)
> {
> uint32_t seq;
> + signed long r = 0;
Please drop the initialization of r here. That is usually seen as rather bad style because it prevents the compiler from raising an warning when this really isn't initialized.
Regards,
Christian.
>
> if (!s)
> return -EINVAL;
>
> seq = ++ring->fence_drv.sync_seq;
> + r = amdgpu_fence_wait_polling(ring,
> + seq - ring->fence_drv.num_fences_mask,
> + timeout);
> + if (r < 1)
> + return -ETIMEDOUT;
> +
> amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
> seq, 0);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index a721b0e0ff69..0103acc57474 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -675,13 +675,15 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device
> *adev, uint32_t reg)
>
> spin_lock_irqsave(&kiq->ring_lock, flags);
> if (amdgpu_device_wb_get(adev, ®_val_offs)) {
> - spin_unlock_irqrestore(&kiq->ring_lock, flags);
> pr_err("critical bug! too many kiq readers\n");
> - goto failed_kiq_read;
> + goto failed_unlock;
> }
> amdgpu_ring_alloc(ring, 32);
> amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r)
> + goto failed_undo;
> +
> amdgpu_ring_commit(ring);
> spin_unlock_irqrestore(&kiq->ring_lock, flags);
>
> @@ -712,7 +714,13 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
> amdgpu_device_wb_free(adev, reg_val_offs);
> return value;
>
> +failed_undo:
> + amdgpu_ring_undo(ring);
> +failed_unlock:
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> failed_kiq_read:
> + if (reg_val_offs)
> + amdgpu_device_wb_free(adev, reg_val_offs);
> pr_err("failed to read reg:%x\n", reg);
> return ~0;
> }
> @@ -730,7 +738,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
> spin_lock_irqsave(&kiq->ring_lock, flags);
> amdgpu_ring_alloc(ring, 32);
> amdgpu_ring_emit_wreg(ring, reg, v);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r)
> + goto failed_undo;
> +
> amdgpu_ring_commit(ring);
> spin_unlock_irqrestore(&kiq->ring_lock, flags);
>
> @@ -759,6 +770,9 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev,
> uint32_t reg, uint32_t v)
>
> return;
>
> +failed_undo:
> + amdgpu_ring_undo(ring);
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> failed_kiq_write:
> pr_err("failed to write reg:%x\n", reg);
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 137d3d2b46e8..be218754629a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -118,7 +118,8 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
> void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
> int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence,
> unsigned flags);
> -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
> +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s,
> + uint32_t timeout);
> bool amdgpu_fence_process(struct amdgpu_ring *ring);
> int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
> signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, diff
> --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 8c10084f44ef..cbbb8d02535a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -60,7 +60,10 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
> amdgpu_ring_alloc(ring, 32);
> amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
> ref, mask);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r)
> + goto failed_undo;
> +
> amdgpu_ring_commit(ring);
> spin_unlock_irqrestore(&kiq->ring_lock, flags);
>
> @@ -82,6 +85,9 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct
> amdgpu_device *adev,
>
> return;
>
> +failed_undo:
> + amdgpu_ring_undo(ring);
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> failed_kiq:
> pr_err("failed to write reg %x wait reg %x\n", reg0, reg1);
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 8a579ce8757d..2b6d7687f4cb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -7605,7 +7605,6 @@ static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
> uint32_t reg_val_offs)
> {
> struct amdgpu_device *adev = ring->adev;
> - struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>
> amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
> amdgpu_ring_write(ring, 0 | /* src: register*/
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 6c56cedce4c3..6ae78b9e9551 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6393,7 +6393,6 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
> uint32_t reg_val_offs)
> {
> struct amdgpu_device *adev = ring->adev;
> - struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>
> amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
> amdgpu_ring_write(ring, 0 | /* src: register*/
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 09aa5f509bd2..be05ab221b16 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -4054,9 +4054,8 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct
> amdgpu_device *adev)
>
> spin_lock_irqsave(&kiq->ring_lock, flags);
> if (amdgpu_device_wb_get(adev, ®_val_offs)) {
> - spin_unlock_irqrestore(&kiq->ring_lock, flags);
> pr_err("critical bug! too many kiq readers\n");
> - goto failed_kiq_read;
> + goto failed_unlock;
> }
> amdgpu_ring_alloc(ring, 32);
> amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); @@ -4070,7
> +4069,10 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
> reg_val_offs * 4));
> amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
> reg_val_offs * 4));
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r)
> + goto failed_undo;
> +
> amdgpu_ring_commit(ring);
> spin_unlock_irqrestore(&kiq->ring_lock, flags);
>
> @@ -4102,7 +4104,13 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
> amdgpu_device_wb_free(adev, reg_val_offs);
> return value;
>
> +failed_undo:
> + amdgpu_ring_undo(ring);
> +failed_unlock:
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> failed_kiq_read:
> + if (reg_val_offs)
> + amdgpu_device_wb_free(adev, reg_val_offs);
> pr_err("failed to read gpu clock\n");
> return ~0;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 30b75d79efdb..b120f9160f13 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -427,7 +427,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
> kiq->pmf->kiq_invalidate_tlbs(ring,
> pasid, flush_type, all_hub);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r) {
> + amdgpu_ring_undo(ring);
> + spin_unlock(&kiq->ring_lock);
> + return -ETIME;
> + }
> +
> amdgpu_ring_commit(ring);
> spin_unlock(&adev->gfx.kiq.ring_lock);
> r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); diff
> --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index fecdbc471983..0a6026308343 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -621,7 +621,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> pasid, 2, all_hub);
> kiq->pmf->kiq_invalidate_tlbs(ring,
> pasid, flush_type, all_hub);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r) {
> + amdgpu_ring_undo(ring);
> + spin_unlock(&kiq->ring_lock);
> + return -ETIME;
> + }
> +
> amdgpu_ring_commit(ring);
> spin_unlock(&adev->gfx.kiq.ring_lock);
> r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] drm/amdgpu: protect ring overrun
2020-04-23 4:22 Yintian Tao
@ 2020-04-23 7:38 ` Christian König
0 siblings, 0 replies; 5+ messages in thread
From: Christian König @ 2020-04-23 7:38 UTC (permalink / raw)
To: Yintian Tao, monk.liu, shaoyun.liu; +Cc: amd-gfx
Am 23.04.20 um 06:22 schrieb Yintian Tao:
> Wait for the oldest sequence on the ring
> to be signaled in order to make sure there
> will be no command overrun.
One technical problem and a few style suggestions below. Apart from that
looks good to me.
>
> Signed-off-by: Yintian Tao <yttao@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 7 +++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 17 +++++++++++++++--
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 +++++++-
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 9 ++++++++-
> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 8 +++++++-
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++++++-
> 6 files changed, 51 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index 7531527067df..5462ea83d8b2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -200,6 +200,13 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
> return -EINVAL;
>
> seq = ++ring->fence_drv.sync_seq;
> + if ((abs(seq - ring->fence_drv.num_fences_mask) >
> + ring->fence_drv.num_fences_mask) &&
That is a rather bad idea and won't work. The sequence is a 32bit value
and supposed to wrap around. Just dropping the extra check should do it
should work.
> + (amdgpu_fence_wait_polling(ring,
> + seq - ring->fence_drv.num_fences_mask,
> + MAX_KIQ_REG_WAIT) < 1))
Maybe we should make the timeout a parameter here.
And it is usually better to use the style like this:
r = amdgpu_fence....
if (r < 1)
.....
> + return -ETIME;
We usually use -ETIMEDOUT because this is not really bound to a specific
timer.
> +
> amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
> seq, 0);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index a721b0e0ff69..7087333681f6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -681,7 +681,14 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
> }
> amdgpu_ring_alloc(ring, 32);
> amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq);
> + if (r) {
> + amdgpu_ring_undo(ring);
> + amdgpu_device_wb_free(adev, reg_val_offs);
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> + goto failed_kiq_read;
> + }
> +
If we already have goto style error handling we should probably just add
a new label to handle it.
Same for the other cases below where you already have a goto.
Regards,
Christian.
> amdgpu_ring_commit(ring);
> spin_unlock_irqrestore(&kiq->ring_lock, flags);
>
> @@ -730,7 +737,13 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
> spin_lock_irqsave(&kiq->ring_lock, flags);
> amdgpu_ring_alloc(ring, 32);
> amdgpu_ring_emit_wreg(ring, reg, v);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq);
> + if (r) {
> + amdgpu_ring_undo(ring);
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> + goto failed_kiq_write;
> + }
> +
> amdgpu_ring_commit(ring);
> spin_unlock_irqrestore(&kiq->ring_lock, flags);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index 8c10084f44ef..12d181ac7e78 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -60,7 +60,13 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
> amdgpu_ring_alloc(ring, 32);
> amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
> ref, mask);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq);
> + if (r) {
> + amdgpu_ring_undo(ring);
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> + goto failed_kiq;
> + }
> +
> amdgpu_ring_commit(ring);
> spin_unlock_irqrestore(&kiq->ring_lock, flags);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 5b1549f167b0..650b7a67d3bc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -4068,7 +4068,14 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
> reg_val_offs * 4));
> amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
> reg_val_offs * 4));
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq);
> + if (r) {
> + amdgpu_ring_undo(ring);
> + amdgpu_device_wb_free(adev, reg_val_offs);
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> + goto failed_kiq_read;
> + }
> +
> amdgpu_ring_commit(ring);
> spin_unlock_irqrestore(&kiq->ring_lock, flags);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 30b75d79efdb..71430f2a2374 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -427,7 +427,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
> kiq->pmf->kiq_invalidate_tlbs(ring,
> pasid, flush_type, all_hub);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq);
> + if (r) {
> + amdgpu_ring_undo(ring);
> + spin_unlock(&kiq->ring_lock);
> + return -ETIME;
> + }
> +
> amdgpu_ring_commit(ring);
> spin_unlock(&adev->gfx.kiq.ring_lock);
> r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index fecdbc471983..a42a95675e2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -621,7 +621,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> pasid, 2, all_hub);
> kiq->pmf->kiq_invalidate_tlbs(ring,
> pasid, flush_type, all_hub);
> - amdgpu_fence_emit_polling(ring, &seq);
> + r = amdgpu_fence_emit_polling(ring, &seq);
> + if (r) {
> + amdgpu_ring_undo(ring);
> + spin_unlock(&kiq->ring_lock);
> + return -ETIME;
> + }
> +
> amdgpu_ring_commit(ring);
> spin_unlock(&adev->gfx.kiq.ring_lock);
> r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH] drm/amdgpu: protect ring overrun
@ 2020-04-23 4:22 Yintian Tao
2020-04-23 7:38 ` Christian König
0 siblings, 1 reply; 5+ messages in thread
From: Yintian Tao @ 2020-04-23 4:22 UTC (permalink / raw)
To: christian.koenig, monk.liu, shaoyun.liu; +Cc: amd-gfx, Yintian Tao
Wait for the oldest sequence on the ring
to be signaled in order to make sure there
will be no command overrun.
Signed-off-by: Yintian Tao <yttao@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 7 +++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 17 +++++++++++++++--
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 +++++++-
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 9 ++++++++-
drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 8 +++++++-
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++++++-
6 files changed, 51 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7531527067df..5462ea83d8b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -200,6 +200,13 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
return -EINVAL;
seq = ++ring->fence_drv.sync_seq;
+ if ((abs(seq - ring->fence_drv.num_fences_mask) >
+ ring->fence_drv.num_fences_mask) &&
+ (amdgpu_fence_wait_polling(ring,
+ seq - ring->fence_drv.num_fences_mask,
+ MAX_KIQ_REG_WAIT) < 1))
+ return -ETIME;
+
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a721b0e0ff69..7087333681f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -681,7 +681,14 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
}
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
- amdgpu_fence_emit_polling(ring, &seq);
+ r = amdgpu_fence_emit_polling(ring, &seq);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ amdgpu_device_wb_free(adev, reg_val_offs);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ goto failed_kiq_read;
+ }
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
@@ -730,7 +737,13 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
spin_lock_irqsave(&kiq->ring_lock, flags);
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_wreg(ring, reg, v);
- amdgpu_fence_emit_polling(ring, &seq);
+ r = amdgpu_fence_emit_polling(ring, &seq);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ goto failed_kiq_write;
+ }
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 8c10084f44ef..12d181ac7e78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -60,7 +60,13 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
amdgpu_ring_alloc(ring, 32);
amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1,
ref, mask);
- amdgpu_fence_emit_polling(ring, &seq);
+ r = amdgpu_fence_emit_polling(ring, &seq);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ goto failed_kiq;
+ }
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5b1549f167b0..650b7a67d3bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4068,7 +4068,14 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
reg_val_offs * 4));
amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
reg_val_offs * 4));
- amdgpu_fence_emit_polling(ring, &seq);
+ r = amdgpu_fence_emit_polling(ring, &seq);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ amdgpu_device_wb_free(adev, reg_val_offs);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+ goto failed_kiq_read;
+ }
+
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&kiq->ring_lock, flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 30b75d79efdb..71430f2a2374 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -427,7 +427,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
kiq->pmf->kiq_invalidate_tlbs(ring,
pasid, flush_type, all_hub);
- amdgpu_fence_emit_polling(ring, &seq);
+ r = amdgpu_fence_emit_polling(ring, &seq);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ spin_unlock(&kiq->ring_lock);
+ return -ETIME;
+ }
+
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index fecdbc471983..a42a95675e2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -621,7 +621,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
pasid, 2, all_hub);
kiq->pmf->kiq_invalidate_tlbs(ring,
pasid, flush_type, all_hub);
- amdgpu_fence_emit_polling(ring, &seq);
+ r = amdgpu_fence_emit_polling(ring, &seq);
+ if (r) {
+ amdgpu_ring_undo(ring);
+ spin_unlock(&kiq->ring_lock);
+ return -ETIME;
+ }
+
amdgpu_ring_commit(ring);
spin_unlock(&adev->gfx.kiq.ring_lock);
r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2020-04-23 12:29 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-23 9:06 [PATCH] drm/amdgpu: protect ring overrun Yintian Tao
2020-04-23 12:21 ` Christian König
2020-04-23 12:29 ` Tao, Yintian
-- strict thread matches above, loose matches on Subject: below --
2020-04-23 4:22 Yintian Tao
2020-04-23 7:38 ` Christian König
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.