All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu/gfx10: fix race condition for kiq
@ 2020-07-10  5:44 Jack Xiao
  2020-07-10  5:44 ` [PATCH 2/2] drm/amdgpu: fix preemption unit test Jack Xiao
  2020-07-10  7:35 ` [PATCH 1/2] drm/amdgpu/gfx10: fix race condition for kiq Christian König
  0 siblings, 2 replies; 4+ messages in thread
From: Jack Xiao @ 2020-07-10  5:44 UTC (permalink / raw)
  To: amd-gfx, Alexander.Deucher, Hawking.Zhang; +Cc: Jack Xiao

During preemption test for gfx10, it uses kiq to trigger
gfx preemption, which would result in race condition
with flushing TLB for kiq.

Signed-off-by: Jack Xiao <Jack.Xiao@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index a6170a346b39..ddf6d8128753 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7836,12 +7836,17 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	unsigned long flags;
 
 	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
 		return -EINVAL;
 
-	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size))
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+
+	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+		spin_unlock_irqrestore(&kiq->ring_lock, flags);
 		return -ENOMEM;
+	}
 
 	/* assert preemption condition */
 	amdgpu_ring_set_preempt_cond_exec(ring, false);
@@ -7852,6 +7857,8 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
 				   ++ring->trail_seq);
 	amdgpu_ring_commit(kiq_ring);
 
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
 	/* poll the trailing fence */
 	for (i = 0; i < adev->usec_timeout; i++) {
 		if (ring->trail_seq ==
-- 
2.26.2

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] drm/amdgpu: fix preemption unit test
  2020-07-10  5:44 [PATCH 1/2] drm/amdgpu/gfx10: fix race condition for kiq Jack Xiao
@ 2020-07-10  5:44 ` Jack Xiao
  2020-07-10  6:42   ` Zhang, Hawking
  2020-07-10  7:35 ` [PATCH 1/2] drm/amdgpu/gfx10: fix race condition for kiq Christian König
  1 sibling, 1 reply; 4+ messages in thread
From: Jack Xiao @ 2020-07-10  5:44 UTC (permalink / raw)
  To: amd-gfx, Alexander.Deucher, Hawking.Zhang; +Cc: Jack Xiao

Remove signaled jobs from job list and ensure the
job was indeed preempted.

Signed-off-by: Jack Xiao <Jack.Xiao@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index aeada7c9fbea..bd5061fbe031 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1343,27 +1343,37 @@ static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched)
 static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring)
 {
 	struct amdgpu_job *job;
-	struct drm_sched_job *s_job;
+	struct drm_sched_job *s_job, *tmp;
 	uint32_t preempt_seq;
 	struct dma_fence *fence, **ptr;
 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
 	struct drm_gpu_scheduler *sched = &ring->sched;
+	bool preempted = true;
 
 	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
 		return;
 
 	preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2));
-	if (preempt_seq <= atomic_read(&drv->last_seq))
-		return;
+	if (preempt_seq <= atomic_read(&drv->last_seq)) {
+		preempted = false;
+		goto no_preempt;
+	}
 
 	preempt_seq &= drv->num_fences_mask;
 	ptr = &drv->fences[preempt_seq];
 	fence = rcu_dereference_protected(*ptr, 1);
 
+no_preempt:
 	spin_lock(&sched->job_list_lock);
-	list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
+	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
+		if (dma_fence_is_signaled(&s_job->s_fence->finished)) {
+			/* remove job from ring_mirror_list */
+			list_del_init(&s_job->node);
+			sched->ops->free_job(s_job);
+			continue;
+		}
 		job = to_amdgpu_job(s_job);
-		if (job->fence == fence)
+		if (preempted && job->fence == fence)
 			/* mark the job as preempted */
 			job->preemption_status |= AMDGPU_IB_PREEMPTED;
 	}
-- 
2.26.2

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* RE: [PATCH 2/2] drm/amdgpu: fix preemption unit test
  2020-07-10  5:44 ` [PATCH 2/2] drm/amdgpu: fix preemption unit test Jack Xiao
@ 2020-07-10  6:42   ` Zhang, Hawking
  0 siblings, 0 replies; 4+ messages in thread
From: Zhang, Hawking @ 2020-07-10  6:42 UTC (permalink / raw)
  To: Xiao, Jack, amd-gfx, Deucher, Alexander

[AMD Public Use]

Series is

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>

Regards,
Hawking
-----Original Message-----
From: Xiao, Jack <Jack.Xiao@amd.com> 
Sent: Friday, July 10, 2020 13:45
To: amd-gfx@lists.freedesktop.org; Deucher, Alexander <Alexander.Deucher@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com>
Cc: Xiao, Jack <Jack.Xiao@amd.com>
Subject: [PATCH 2/2] drm/amdgpu: fix preemption unit test

Remove signaled jobs from job list and ensure the job was indeed preempted.

Signed-off-by: Jack Xiao <Jack.Xiao@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index aeada7c9fbea..bd5061fbe031 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1343,27 +1343,37 @@ static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched)  static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring)  {
 	struct amdgpu_job *job;
-	struct drm_sched_job *s_job;
+	struct drm_sched_job *s_job, *tmp;
 	uint32_t preempt_seq;
 	struct dma_fence *fence, **ptr;
 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
 	struct drm_gpu_scheduler *sched = &ring->sched;
+	bool preempted = true;
 
 	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
 		return;
 
 	preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2));
-	if (preempt_seq <= atomic_read(&drv->last_seq))
-		return;
+	if (preempt_seq <= atomic_read(&drv->last_seq)) {
+		preempted = false;
+		goto no_preempt;
+	}
 
 	preempt_seq &= drv->num_fences_mask;
 	ptr = &drv->fences[preempt_seq];
 	fence = rcu_dereference_protected(*ptr, 1);
 
+no_preempt:
 	spin_lock(&sched->job_list_lock);
-	list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
+	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
+		if (dma_fence_is_signaled(&s_job->s_fence->finished)) {
+			/* remove job from ring_mirror_list */
+			list_del_init(&s_job->node);
+			sched->ops->free_job(s_job);
+			continue;
+		}
 		job = to_amdgpu_job(s_job);
-		if (job->fence == fence)
+		if (preempted && job->fence == fence)
 			/* mark the job as preempted */
 			job->preemption_status |= AMDGPU_IB_PREEMPTED;
 	}
--
2.26.2
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu/gfx10: fix race condition for kiq
  2020-07-10  5:44 [PATCH 1/2] drm/amdgpu/gfx10: fix race condition for kiq Jack Xiao
  2020-07-10  5:44 ` [PATCH 2/2] drm/amdgpu: fix preemption unit test Jack Xiao
@ 2020-07-10  7:35 ` Christian König
  1 sibling, 0 replies; 4+ messages in thread
From: Christian König @ 2020-07-10  7:35 UTC (permalink / raw)
  To: Jack Xiao, amd-gfx, Alexander.Deucher, Hawking.Zhang

Am 10.07.20 um 07:44 schrieb Jack Xiao:
> During preemption test for gfx10, it uses kiq to trigger
> gfx preemption, which would result in race condition
> with flushing TLB for kiq.
>
> Signed-off-by: Jack Xiao <Jack.Xiao@amd.com>

Acked-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 9 ++++++++-
>   1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index a6170a346b39..ddf6d8128753 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -7836,12 +7836,17 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
>   	struct amdgpu_device *adev = ring->adev;
>   	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>   	struct amdgpu_ring *kiq_ring = &kiq->ring;
> +	unsigned long flags;
>   
>   	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
>   		return -EINVAL;
>   
> -	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size))
> +	spin_lock_irqsave(&kiq->ring_lock, flags);
> +
> +	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
> +		spin_unlock_irqrestore(&kiq->ring_lock, flags);
>   		return -ENOMEM;
> +	}
>   
>   	/* assert preemption condition */
>   	amdgpu_ring_set_preempt_cond_exec(ring, false);
> @@ -7852,6 +7857,8 @@ static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
>   				   ++ring->trail_seq);
>   	amdgpu_ring_commit(kiq_ring);
>   
> +	spin_unlock_irqrestore(&kiq->ring_lock, flags);
> +
>   	/* poll the trailing fence */
>   	for (i = 0; i < adev->usec_timeout; i++) {
>   		if (ring->trail_seq ==

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-07-10  7:35 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-10  5:44 [PATCH 1/2] drm/amdgpu/gfx10: fix race condition for kiq Jack Xiao
2020-07-10  5:44 ` [PATCH 2/2] drm/amdgpu: fix preemption unit test Jack Xiao
2020-07-10  6:42   ` Zhang, Hawking
2020-07-10  7:35 ` [PATCH 1/2] drm/amdgpu/gfx10: fix race condition for kiq Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.