All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] drm/scheduler: rework job destruction
@ 2019-04-11 16:03 Andrey Grodzovsky
  2019-04-11 16:03 ` [PATCH 2/4] drm/sched: Keep s_fence->parent pointer Andrey Grodzovsky
       [not found] ` <1554998624-25799-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 2 replies; 12+ messages in thread
From: Andrey Grodzovsky @ 2019-04-11 16:03 UTC (permalink / raw)
  To: dri-devel, amd-gfx, eric, etnaviv, ckoenig.leichtzumerken
  Cc: Nicholas.Kazlauskas, Christian König

From: Christian König <christian.koenig@amd.com>

We now destroy finished jobs from the worker thread to make sure that
we never destroy a job currently in timeout processing.
By this we avoid holding lock around ring mirror list in drm_sched_stop
which should solve a deadlock reported by a user.

v2: Remove unused variable.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=109692

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  17 ++--
 drivers/gpu/drm/etnaviv/etnaviv_dump.c     |   4 -
 drivers/gpu/drm/etnaviv/etnaviv_sched.c    |   9 +-
 drivers/gpu/drm/scheduler/sched_main.c     | 138 +++++++++++++++++------------
 drivers/gpu/drm/v3d/v3d_sched.c            |   9 +-
 include/drm/gpu_scheduler.h                |   6 +-
 6 files changed, 108 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index fcb3d95..aabd043 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3339,15 +3339,23 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 		if (!ring || !ring->sched.thread)
 			continue;
 
-		drm_sched_stop(&ring->sched);
+		drm_sched_stop(&ring->sched, &job->base);
 
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
 	}
 
-	if(job)
+	if(job) {
 		drm_sched_increase_karma(&job->base);
 
+		/*
+		 * Guilty job did complete and hence needs to be manually removed
+		 * See drm_sched_stop doc.
+		 */
+		if (list_empty(&job->base.node))
+			job->base.sched->ops->free_job(&job->base);
+	}
+
 
 
 	if (!amdgpu_sriov_vf(adev)) {
@@ -3487,8 +3495,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 	return r;
 }
 
-static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
-					  struct amdgpu_job *job)
+static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
 {
 	int i;
 
@@ -3628,7 +3635,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
 	/* Post ASIC reset for all devs .*/
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
-		amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL);
+		amdgpu_device_post_asic_reset(tmp_adev);
 
 		if (r) {
 			/* bad news, how to tell it to userspace ? */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index 3fbb485..8434715 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -135,13 +135,11 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 		    mmu_size + gpu->buffer.size;
 
 	/* Add in the active command buffers */
-	spin_lock_irqsave(&gpu->sched.job_list_lock, flags);
 	list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
 		submit = to_etnaviv_submit(s_job);
 		file_size += submit->cmdbuf.size;
 		n_obj++;
 	}
-	spin_unlock_irqrestore(&gpu->sched.job_list_lock, flags);
 
 	/* Add in the active buffer objects */
 	list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) {
@@ -183,14 +181,12 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 			      gpu->buffer.size,
 			      etnaviv_cmdbuf_get_va(&gpu->buffer));
 
-	spin_lock_irqsave(&gpu->sched.job_list_lock, flags);
 	list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
 		submit = to_etnaviv_submit(s_job);
 		etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
 				      submit->cmdbuf.vaddr, submit->cmdbuf.size,
 				      etnaviv_cmdbuf_get_va(&submit->cmdbuf));
 	}
-	spin_unlock_irqrestore(&gpu->sched.job_list_lock, flags);
 
 	/* Reserve space for the bomap */
 	if (n_bomap_pages) {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 67ae266..8795c19 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -109,11 +109,18 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
 	}
 
 	/* block scheduler */
-	drm_sched_stop(&gpu->sched);
+	drm_sched_stop(&gpu->sched, sched_job);
 
 	if(sched_job)
 		drm_sched_increase_karma(sched_job);
 
+	/*
+	 * Guilty job did complete and hence needs to be manually removed
+	 * See drm_sched_stop doc.
+	 */
+	if (list_empty(&sched_job->node))
+		sched_job->sched->ops->free_job(sched_job);
+
 	/* get the GPU back into the init state */
 	etnaviv_core_dump(gpu);
 	etnaviv_gpu_recover_hang(gpu);
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 19fc601..05a4540 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -265,32 +265,6 @@ void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
 }
 EXPORT_SYMBOL(drm_sched_resume_timeout);
 
-/* job_finish is called after hw fence signaled
- */
-static void drm_sched_job_finish(struct work_struct *work)
-{
-	struct drm_sched_job *s_job = container_of(work, struct drm_sched_job,
-						   finish_work);
-	struct drm_gpu_scheduler *sched = s_job->sched;
-	unsigned long flags;
-
-	/*
-	 * Canceling the timeout without removing our job from the ring mirror
-	 * list is safe, as we will only end up in this worker if our jobs
-	 * finished fence has been signaled. So even if some another worker
-	 * manages to find this job as the next job in the list, the fence
-	 * signaled check below will prevent the timeout to be restarted.
-	 */
-	cancel_delayed_work_sync(&sched->work_tdr);
-
-	spin_lock_irqsave(&sched->job_list_lock, flags);
-	/* queue TDR for next job */
-	drm_sched_start_timeout(sched);
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
-
-	sched->ops->free_job(s_job);
-}
-
 static void drm_sched_job_begin(struct drm_sched_job *s_job)
 {
 	struct drm_gpu_scheduler *sched = s_job->sched;
@@ -371,23 +345,26 @@ EXPORT_SYMBOL(drm_sched_increase_karma);
  * @sched: scheduler instance
  * @bad: bad scheduler job
  *
+ * Stop the scheduler and also removes and frees all completed jobs.
+ * Note: bad job will not be freed as it might be used later and so it's
+ * callers responsibility to release it manually if it's not part of the
+ * mirror list any more.
+ *
  */
-void drm_sched_stop(struct drm_gpu_scheduler *sched)
+void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 {
-	struct drm_sched_job *s_job;
+	struct drm_sched_job *s_job, *tmp;
 	unsigned long flags;
-	struct dma_fence *last_fence =  NULL;
 
 	kthread_park(sched->thread);
 
 	/*
-	 * Verify all the signaled jobs in mirror list are removed from the ring
-	 * by waiting for the latest job to enter the list. This should insure that
-	 * also all the previous jobs that were in flight also already singaled
-	 * and removed from the list.
+	 * Iterate the job list from later to  earlier one and either deactive
+	 * their HW callbacks or remove them from mirror list if they already
+	 * signaled.
+	 * This iteration is thread safe as sched thread is stopped.
 	 */
-	spin_lock_irqsave(&sched->job_list_lock, flags);
-	list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
+	list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) {
 		if (s_job->s_fence->parent &&
 		    dma_fence_remove_callback(s_job->s_fence->parent,
 					      &s_job->cb)) {
@@ -395,16 +372,30 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched)
 			s_job->s_fence->parent = NULL;
 			atomic_dec(&sched->hw_rq_count);
 		} else {
-			 last_fence = dma_fence_get(&s_job->s_fence->finished);
-			 break;
+			/*
+			 * remove job from ring_mirror_list.
+			 * Locking here is for concurrent resume timeout
+			 */
+			spin_lock_irqsave(&sched->job_list_lock, flags);
+			list_del_init(&s_job->node);
+			spin_unlock_irqrestore(&sched->job_list_lock, flags);
+
+			/*
+			 * Wait for job's HW fence callback to finish using s_job
+			 * before releasing it.
+			 *
+			 * Job is still alive so fence refcount at least 1
+			 */
+			dma_fence_wait(&s_job->s_fence->finished, false);
+
+			/*
+			 * We must keep bad job alive for later use during
+			 * recovery by some of the drivers
+			 */
+			if (bad != s_job)
+				sched->ops->free_job(s_job);
 		}
 	}
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
-
-	if (last_fence) {
-		dma_fence_wait(last_fence, false);
-		dma_fence_put(last_fence);
-	}
 }
 
 EXPORT_SYMBOL(drm_sched_stop);
@@ -418,6 +409,7 @@ EXPORT_SYMBOL(drm_sched_stop);
 void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 {
 	struct drm_sched_job *s_job, *tmp;
+	unsigned long flags;
 	int r;
 
 	if (!full_recovery)
@@ -425,9 +417,7 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 
 	/*
 	 * Locking the list is not required here as the sched thread is parked
-	 * so no new jobs are being pushed in to HW and in drm_sched_stop we
-	 * flushed all the jobs who were still in mirror list but who already
-	 * signaled and removed them self from the list. Also concurrent
+	 * so no new jobs are being inserted or removed. Also concurrent
 	 * GPU recovers can't run in parallel.
 	 */
 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
@@ -445,7 +435,9 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 			drm_sched_process_job(NULL, &s_job->cb);
 	}
 
+	spin_lock_irqsave(&sched->job_list_lock, flags);
 	drm_sched_start_timeout(sched);
+	spin_unlock_irqrestore(&sched->job_list_lock, flags);
 
 unpark:
 	kthread_unpark(sched->thread);
@@ -464,7 +456,6 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 	uint64_t guilty_context;
 	bool found_guilty = false;
 
-	/*TODO DO we need spinlock here ? */
 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 		struct drm_sched_fence *s_fence = s_job->s_fence;
 
@@ -514,7 +505,6 @@ int drm_sched_job_init(struct drm_sched_job *job,
 		return -ENOMEM;
 	job->id = atomic64_inc_return(&sched->job_id_count);
 
-	INIT_WORK(&job->finish_work, drm_sched_job_finish);
 	INIT_LIST_HEAD(&job->node);
 
 	return 0;
@@ -597,24 +587,53 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 	struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
 	struct drm_sched_fence *s_fence = s_job->s_fence;
 	struct drm_gpu_scheduler *sched = s_fence->sched;
-	unsigned long flags;
-
-	cancel_delayed_work(&sched->work_tdr);
 
 	atomic_dec(&sched->hw_rq_count);
 	atomic_dec(&sched->num_jobs);
 
-	spin_lock_irqsave(&sched->job_list_lock, flags);
-	/* remove job from ring_mirror_list */
-	list_del_init(&s_job->node);
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
+	trace_drm_sched_process_job(s_fence);
 
 	drm_sched_fence_finished(s_fence);
-
-	trace_drm_sched_process_job(s_fence);
 	wake_up_interruptible(&sched->wake_up_worker);
+}
+
+/**
+ * drm_sched_cleanup_jobs - destroy finished jobs
+ *
+ * @sched: scheduler instance
+ *
+ * Remove all finished jobs from the mirror list and destroy them.
+ */
+static void drm_sched_cleanup_jobs(struct drm_gpu_scheduler *sched)
+{
+	unsigned long flags;
+
+	/* Don't destroy jobs while the timeout worker is running */
+	if (!cancel_delayed_work(&sched->work_tdr))
+		return;
+
+
+	while (!list_empty(&sched->ring_mirror_list)) {
+		struct drm_sched_job *job;
+
+		job = list_first_entry(&sched->ring_mirror_list,
+				       struct drm_sched_job, node);
+		if (!dma_fence_is_signaled(&job->s_fence->finished))
+			break;
+
+		spin_lock_irqsave(&sched->job_list_lock, flags);
+		/* remove job from ring_mirror_list */
+		list_del_init(&job->node);
+		spin_unlock_irqrestore(&sched->job_list_lock, flags);
+
+		sched->ops->free_job(job);
+	}
+
+	/* queue timeout for next job */
+	spin_lock_irqsave(&sched->job_list_lock, flags);
+	drm_sched_start_timeout(sched);
+	spin_unlock_irqrestore(&sched->job_list_lock, flags);
 
-	schedule_work(&s_job->finish_work);
 }
 
 /**
@@ -656,9 +675,10 @@ static int drm_sched_main(void *param)
 		struct dma_fence *fence;
 
 		wait_event_interruptible(sched->wake_up_worker,
+					 (drm_sched_cleanup_jobs(sched),
 					 (!drm_sched_blocked(sched) &&
 					  (entity = drm_sched_select_entity(sched))) ||
-					 kthread_should_stop());
+					 kthread_should_stop()));
 
 		if (!entity)
 			continue;
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index ce7c737b..8efb091 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -232,11 +232,18 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
 
 	/* block scheduler */
 	for (q = 0; q < V3D_MAX_QUEUES; q++)
-		drm_sched_stop(&v3d->queue[q].sched);
+		drm_sched_stop(&v3d->queue[q].sched, sched_job);
 
 	if(sched_job)
 		drm_sched_increase_karma(sched_job);
 
+	/*
+	 * Guilty job did complete and hence needs to be manually removed
+	 * See drm_sched_stop doc.
+	 */
+	if (list_empty(&sched_job->node))
+		sched_job->sched->ops->free_job(sched_job);
+
 	/* get the GPU back into the init state */
 	v3d_reset(v3d);
 
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 0daca4d..9ee0f27 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -167,9 +167,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
  * @sched: the scheduler instance on which this job is scheduled.
  * @s_fence: contains the fences for the scheduling of job.
  * @finish_cb: the callback for the finished fence.
- * @finish_work: schedules the function @drm_sched_job_finish once the job has
- *               finished to remove the job from the
- *               @drm_gpu_scheduler.ring_mirror_list.
  * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list.
  * @id: a unique id assigned to each job scheduled on the scheduler.
  * @karma: increment on every hang caused by this job. If this exceeds the hang
@@ -188,7 +185,6 @@ struct drm_sched_job {
 	struct drm_gpu_scheduler	*sched;
 	struct drm_sched_fence		*s_fence;
 	struct dma_fence_cb		finish_cb;
-	struct work_struct		finish_work;
 	struct list_head		node;
 	uint64_t			id;
 	atomic_t			karma;
@@ -296,7 +292,7 @@ int drm_sched_job_init(struct drm_sched_job *job,
 		       void *owner);
 void drm_sched_job_cleanup(struct drm_sched_job *job);
 void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
-void drm_sched_stop(struct drm_gpu_scheduler *sched);
+void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
 void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery);
 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched);
 void drm_sched_increase_karma(struct drm_sched_job *bad);
-- 
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/4] drm/sched: Keep s_fence->parent pointer
  2019-04-11 16:03 [PATCH 1/4] drm/scheduler: rework job destruction Andrey Grodzovsky
@ 2019-04-11 16:03 ` Andrey Grodzovsky
       [not found] ` <1554998624-25799-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 0 replies; 12+ messages in thread
From: Andrey Grodzovsky @ 2019-04-11 16:03 UTC (permalink / raw)
  To: dri-devel, amd-gfx, eric, etnaviv, ckoenig.leichtzumerken
  Cc: Nicholas.Kazlauskas

For later driver's reference to see if the fence is signaled.

v2: Move parent fence put to resubmit jobs.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/scheduler/sched_main.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 05a4540..89d1f1b 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -368,8 +368,6 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 		if (s_job->s_fence->parent &&
 		    dma_fence_remove_callback(s_job->s_fence->parent,
 					      &s_job->cb)) {
-			dma_fence_put(s_job->s_fence->parent);
-			s_job->s_fence->parent = NULL;
 			atomic_dec(&sched->hw_rq_count);
 		} else {
 			/*
@@ -396,6 +394,14 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 				sched->ops->free_job(s_job);
 		}
 	}
+
+	/*
+	 * Stop pending timer in flight as we rearm it in  drm_sched_start. This
+	 * avoids the pending timeout work in progress to fire right away after
+	 * this TDR finished and before the newly restarted jobs had a
+	 * chance to complete.
+	 */
+	cancel_delayed_work(&sched->work_tdr);
 }
 
 EXPORT_SYMBOL(drm_sched_stop);
@@ -467,6 +473,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 		if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
 			dma_fence_set_error(&s_fence->finished, -ECANCELED);
 
+		dma_fence_put(s_job->s_fence->parent);
 		s_job->s_fence->parent = sched->ops->run_job(s_job);
 		atomic_inc(&sched->hw_rq_count);
 	}
-- 
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/4] drm/amdgpu: Avoid HW reset if guilty job already signaled.
       [not found] ` <1554998624-25799-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
@ 2019-04-11 16:03   ` Andrey Grodzovsky
  2019-04-12  7:39     ` Christian König
  2019-04-11 16:03   ` [PATCH 4/4] drm/amd/display: Restore deleted patch to resolve reset deadlock Andrey Grodzovsky
  1 sibling, 1 reply; 12+ messages in thread
From: Andrey Grodzovsky @ 2019-04-11 16:03 UTC (permalink / raw)
  To: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	eric-WhKQ6XTQaPysTnJN9+BGXg,
	etnaviv-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Andrey Grodzovsky, Nicholas.Kazlauskas-5C7GfCeVMHo

Also reject TDRs if another one already running.

v2:
Stop all schedulers across device and entire XGMI hive before
force signaling HW fences.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 125 ++++++++++++++++++++---------
 1 file changed, 88 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index aabd043..ce9c494 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3327,7 +3327,8 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
 
 static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 					struct amdgpu_job *job,
-					bool *need_full_reset_arg)
+					bool *need_full_reset_arg,
+					bool job_signaled)
 {
 	int i, r = 0;
 	bool need_full_reset  = *need_full_reset_arg;
@@ -3339,8 +3340,6 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 		if (!ring || !ring->sched.thread)
 			continue;
 
-		drm_sched_stop(&ring->sched, &job->base);
-
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
 	}
@@ -3358,7 +3357,8 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 
 
 
-	if (!amdgpu_sriov_vf(adev)) {
+	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
+	if (!amdgpu_sriov_vf(adev) && !job_signaled) {
 
 		if (!need_full_reset)
 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
@@ -3495,7 +3495,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 	return r;
 }
 
-static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
+static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev, bool job_signaled)
 {
 	int i;
 
@@ -3505,7 +3505,8 @@ static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
 		if (!ring || !ring->sched.thread)
 			continue;
 
-		if (!adev->asic_reset_res)
+		/* No point to resubmit jobs if we didn't HW reset*/
+		if (!adev->asic_reset_res && !job_signaled)
 			drm_sched_resubmit_jobs(&ring->sched);
 
 		drm_sched_start(&ring->sched, !adev->asic_reset_res);
@@ -3518,14 +3519,21 @@ static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
 	adev->asic_reset_res = 0;
 }
 
-static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
+static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
 {
-	mutex_lock(&adev->lock_reset);
+	if (trylock) {
+		if (!mutex_trylock(&adev->lock_reset))
+			return false;
+	} else
+		mutex_lock(&adev->lock_reset);
+
 	atomic_inc(&adev->gpu_reset_counter);
 	adev->in_gpu_reset = 1;
 	/* Block kfd: SRIOV would do it separately */
 	if (!amdgpu_sriov_vf(adev))
                 amdgpu_amdkfd_pre_reset(adev);
+
+	return true;
 }
 
 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
@@ -3553,38 +3561,43 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 			      struct amdgpu_job *job)
 {
-	int r;
+	int r, i;
 	struct amdgpu_hive_info *hive = NULL;
-	bool need_full_reset = false;
 	struct amdgpu_device *tmp_adev = NULL;
 	struct list_head device_list, *device_list_handle =  NULL;
+	bool xgmi_topology_present, need_full_reset, job_signaled;
 
+	need_full_reset = job_signaled = false;
 	INIT_LIST_HEAD(&device_list);
 
 	dev_info(adev->dev, "GPU reset begin!\n");
 
+	hive = amdgpu_get_xgmi_hive(adev, 0);
+	xgmi_topology_present = hive && adev->gmc.xgmi.num_physical_nodes > 1;
+
 	/*
-	 * In case of XGMI hive disallow concurrent resets to be triggered
-	 * by different nodes. No point also since the one node already executing
-	 * reset will also reset all the other nodes in the hive.
+	 * Here we trylock to avoid chain of resets executing from
+	 * either trigger by jobs on different adevs in XGMI hive or jobs on
+	 * different schedulers for same device while this tdr is running.
+	 * We always reset all schedulers for device and all devices for XGMI
+	 * hive so that should take care of them too.
 	 */
-	hive = amdgpu_get_xgmi_hive(adev, 0);
-	if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
-	    !mutex_trylock(&hive->reset_lock))
+
+	if (xgmi_topology_present && !mutex_trylock(&hive->reset_lock)) {
+		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
+			 job->base.id, hive->hive_id);
 		return 0;
+	}
 
 	/* Start with adev pre asic reset first for soft reset check.*/
-	amdgpu_device_lock_adev(adev);
-	r = amdgpu_device_pre_asic_reset(adev,
-					 job,
-					 &need_full_reset);
-	if (r) {
-		/*TODO Should we stop ?*/
-		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
-			  r, adev->ddev->unique);
-		adev->asic_reset_res = r;
+	if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) {
+		DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
+					 job->base.id);
+		return 0;
 	}
 
+	need_full_reset = amdgpu_device_ip_need_full_reset(adev);
+
 	/* Build list of devices to reset */
 	if  (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
 		if (!hive) {
@@ -3603,16 +3616,52 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		device_list_handle = &device_list;
 	}
 
+	/* block all schedulers and reset given job's ring */
+	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+			struct amdgpu_ring *ring = tmp_adev->rings[i];
+
+			if (!ring || !ring->sched.thread)
+				continue;
+
+			drm_sched_stop(&ring->sched, &job->base);
+		}
+	}
+
+	/*
+	 * Must check guilty signal here since after this point all old
+	 * HW fences are force signaled.
+	 *
+	 * job->base holds a reference to parent fence
+	 */
+	if (job && job->base.s_fence->parent &&
+	    dma_fence_is_signaled(job->base.s_fence->parent))
+		job_signaled = true;
+
+
+	/* Guilty job will be freed after this*/
+	r = amdgpu_device_pre_asic_reset(adev,
+					 job,
+					 &need_full_reset,
+					 job_signaled);
+	if (r) {
+		/*TODO Should we stop ?*/
+		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
+			  r, adev->ddev->unique);
+		adev->asic_reset_res = r;
+	}
+
 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
 
 		if (tmp_adev == adev)
 			continue;
 
-		amdgpu_device_lock_adev(tmp_adev);
+		amdgpu_device_lock_adev(tmp_adev, false);
 		r = amdgpu_device_pre_asic_reset(tmp_adev,
 						 NULL,
-						 &need_full_reset);
+						 &need_full_reset,
+						 job_signaled);
 		/*TODO Should we stop ?*/
 		if (r) {
 			DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
@@ -3623,19 +3672,21 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
 	/* Actual ASIC resets if needed.*/
 	/* TODO Implement XGMI hive reset logic for SRIOV */
-	if (amdgpu_sriov_vf(adev)) {
-		r = amdgpu_device_reset_sriov(adev, job ? false : true);
-		if (r)
-			adev->asic_reset_res = r;
-	} else {
-		r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
-		if (r && r == -EAGAIN)
-			goto retry;
+	if (!job || !job_signaled) {
+		if (amdgpu_sriov_vf(adev)) {
+			r = amdgpu_device_reset_sriov(adev, job ? false : true);
+			if (r)
+				adev->asic_reset_res = r;
+		} else {
+			r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
+			if (r && r == -EAGAIN)
+				goto retry;
+		}
 	}
 
 	/* Post ASIC reset for all devs .*/
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
-		amdgpu_device_post_asic_reset(tmp_adev);
+		amdgpu_device_post_asic_reset(tmp_adev, job_signaled);
 
 		if (r) {
 			/* bad news, how to tell it to userspace ? */
@@ -3648,7 +3699,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		amdgpu_device_unlock_adev(tmp_adev);
 	}
 
-	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
+	if (xgmi_topology_present)
 		mutex_unlock(&hive->reset_lock);
 
 	if (r)
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/4] drm/amd/display: Restore deleted patch to resolve reset deadlock.
       [not found] ` <1554998624-25799-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  2019-04-11 16:03   ` [PATCH 3/4] drm/amdgpu: Avoid HW reset if guilty job already signaled Andrey Grodzovsky
@ 2019-04-11 16:03   ` Andrey Grodzovsky
  2019-04-11 16:41     ` Kazlauskas, Nicholas
       [not found]     ` <1554998624-25799-4-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 2 replies; 12+ messages in thread
From: Andrey Grodzovsky @ 2019-04-11 16:03 UTC (permalink / raw)
  To: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	eric-WhKQ6XTQaPysTnJN9+BGXg,
	etnaviv-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Andrey Grodzovsky, Nicholas.Kazlauskas-5C7GfCeVMHo

Patch '5edb0c9b Fix deadlock with display during hanged ring recovery'
was accidentaly removed during one of DALs code merges.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 0648794..27e0383 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5138,14 +5138,21 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 		 */
 		abo = gem_to_amdgpu_bo(fb->obj[0]);
 		r = amdgpu_bo_reserve(abo, true);
-		if (unlikely(r != 0)) {
+		if (unlikely(r != 0))
 			DRM_ERROR("failed to reserve buffer before flip\n");
-			WARN_ON(1);
-		}
 
-		/* Wait for all fences on this FB */
-		WARN_ON(reservation_object_wait_timeout_rcu(abo->tbo.resv, true, false,
-									    MAX_SCHEDULE_TIMEOUT) < 0);
+		/*
+		 * Wait for all fences on this FB. Do limited wait to avoid
+		 * deadlock during GPU reset when this fence will not signal
+		 * but we hold reservation lock for the BO.
+		 */
+		r = reservation_object_wait_timeout_rcu(abo->tbo.resv,
+							true, false,
+							msecs_to_jiffies(5000));
+		if (unlikely(r == 0))
+			DRM_ERROR("Waiting for fences timed out.");
+
+
 
 		amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] drm/amd/display: Restore deleted patch to resolve reset deadlock.
  2019-04-11 16:03   ` [PATCH 4/4] drm/amd/display: Restore deleted patch to resolve reset deadlock Andrey Grodzovsky
@ 2019-04-11 16:41     ` Kazlauskas, Nicholas
  2019-04-11 16:57       ` Grodzovsky, Andrey
       [not found]     ` <1554998624-25799-4-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 1 reply; 12+ messages in thread
From: Kazlauskas, Nicholas @ 2019-04-11 16:41 UTC (permalink / raw)
  To: Grodzovsky, Andrey, dri-devel, amd-gfx, ckoenig.leichtzumerken; +Cc: etnaviv

On 4/11/19 12:03 PM, Andrey Grodzovsky wrote:
> Patch '5edb0c9b Fix deadlock with display during hanged ring recovery'
> was accidentaly removed during one of DALs code merges.
> 
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>

Probably got lost in a refactor.

Also, didn't Christian have a patch recently to not lock the reservation 
object when waiting for the fence? Looks like that's missing too, or 
maybe it didn't get merged.

Nicholas Kazlauskas

> ---
>   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 +++++++++++++------
>   1 file changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 0648794..27e0383 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -5138,14 +5138,21 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>   		 */
>   		abo = gem_to_amdgpu_bo(fb->obj[0]);
>   		r = amdgpu_bo_reserve(abo, true);
> -		if (unlikely(r != 0)) {
> +		if (unlikely(r != 0))
>   			DRM_ERROR("failed to reserve buffer before flip\n");
> -			WARN_ON(1);
> -		}
>   
> -		/* Wait for all fences on this FB */
> -		WARN_ON(reservation_object_wait_timeout_rcu(abo->tbo.resv, true, false,
> -									    MAX_SCHEDULE_TIMEOUT) < 0);
> +		/*
> +		 * Wait for all fences on this FB. Do limited wait to avoid
> +		 * deadlock during GPU reset when this fence will not signal
> +		 * but we hold reservation lock for the BO.
> +		 */
> +		r = reservation_object_wait_timeout_rcu(abo->tbo.resv,
> +							true, false,
> +							msecs_to_jiffies(5000));
> +		if (unlikely(r == 0))
> +			DRM_ERROR("Waiting for fences timed out.");
> +
> +
>   
>   		amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
>   
> 

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] drm/amd/display: Restore deleted patch to resolve reset deadlock.
  2019-04-11 16:41     ` Kazlauskas, Nicholas
@ 2019-04-11 16:57       ` Grodzovsky, Andrey
  0 siblings, 0 replies; 12+ messages in thread
From: Grodzovsky, Andrey @ 2019-04-11 16:57 UTC (permalink / raw)
  To: Kazlauskas, Nicholas, dri-devel, amd-gfx, ckoenig.leichtzumerken; +Cc: etnaviv


On 4/11/19 12:41 PM, Kazlauskas, Nicholas wrote:
> On 4/11/19 12:03 PM, Andrey Grodzovsky wrote:
>> Patch '5edb0c9b Fix deadlock with display during hanged ring recovery'
>> was accidentaly removed during one of DALs code merges.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
>
> Probably got lost in a refactor.
>
> Also, didn't Christian have a patch recently to not lock the reservation
> object when waiting for the fence? Looks like that's missing too, or
> maybe it didn't get merged.
>
> Nicholas Kazlauskas


This patch actually didn't help to resolve the deadlock - take a look at 
https://bugs.freedesktop.org/show_bug.cgi?id=109692 toward the end. I 
believe that the reason is that the fences attached to the FB BO in the 
reservation object are SW fences, they are dettached from HW fences 
during reset  and twill only be attached back/signaled later in the 
reset sequence when we resubmit the jobs in the ring mirror list. So 
force signaling the HW fences that we do before suspending the display 
will have no effect. But I am not 100% sure about this.

Andrey

>
>> ---
>>    drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 +++++++++++++------
>>    1 file changed, 13 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> index 0648794..27e0383 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> @@ -5138,14 +5138,21 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>>    		 */
>>    		abo = gem_to_amdgpu_bo(fb->obj[0]);
>>    		r = amdgpu_bo_reserve(abo, true);
>> -		if (unlikely(r != 0)) {
>> +		if (unlikely(r != 0))
>>    			DRM_ERROR("failed to reserve buffer before flip\n");
>> -			WARN_ON(1);
>> -		}
>>    
>> -		/* Wait for all fences on this FB */
>> -		WARN_ON(reservation_object_wait_timeout_rcu(abo->tbo.resv, true, false,
>> -									    MAX_SCHEDULE_TIMEOUT) < 0);
>> +		/*
>> +		 * Wait for all fences on this FB. Do limited wait to avoid
>> +		 * deadlock during GPU reset when this fence will not signal
>> +		 * but we hold reservation lock for the BO.
>> +		 */
>> +		r = reservation_object_wait_timeout_rcu(abo->tbo.resv,
>> +							true, false,
>> +							msecs_to_jiffies(5000));
>> +		if (unlikely(r == 0))
>> +			DRM_ERROR("Waiting for fences timed out.");
>> +
>> +
>>    
>>    		amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
>>    
>>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/4] drm/amdgpu: Avoid HW reset if guilty job already signaled.
  2019-04-11 16:03   ` [PATCH 3/4] drm/amdgpu: Avoid HW reset if guilty job already signaled Andrey Grodzovsky
@ 2019-04-12  7:39     ` Christian König
       [not found]       ` <04aa4b4a-019e-af55-4869-6deb7652f727-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2019-04-12  7:39 UTC (permalink / raw)
  To: Andrey Grodzovsky, dri-devel, amd-gfx, eric, etnaviv; +Cc: Nicholas.Kazlauskas

Am 11.04.19 um 18:03 schrieb Andrey Grodzovsky:
> Also reject TDRs if another one already running.
>
> v2:
> Stop all schedulers across device and entire XGMI hive before
> force signaling HW fences.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 125 ++++++++++++++++++++---------
>   1 file changed, 88 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index aabd043..ce9c494 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3327,7 +3327,8 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
>   
>   static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>   					struct amdgpu_job *job,
> -					bool *need_full_reset_arg)
> +					bool *need_full_reset_arg,
> +					bool job_signaled)
>   {
>   	int i, r = 0;
>   	bool need_full_reset  = *need_full_reset_arg;
> @@ -3339,8 +3340,6 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>   		if (!ring || !ring->sched.thread)
>   			continue;
>   
> -		drm_sched_stop(&ring->sched, &job->base);
> -
>   		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
>   		amdgpu_fence_driver_force_completion(ring);
>   	}
> @@ -3358,7 +3357,8 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>   
>   
>   
> -	if (!amdgpu_sriov_vf(adev)) {
> +	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
> +	if (!amdgpu_sriov_vf(adev) && !job_signaled) {
>   
>   		if (!need_full_reset)
>   			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
> @@ -3495,7 +3495,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
>   	return r;
>   }
>   
> -static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
> +static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev, bool job_signaled)
>   {
>   	int i;
>   
> @@ -3505,7 +3505,8 @@ static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>   		if (!ring || !ring->sched.thread)
>   			continue;
>   
> -		if (!adev->asic_reset_res)
> +		/* No point to resubmit jobs if we didn't HW reset*/
> +		if (!adev->asic_reset_res && !job_signaled)
>   			drm_sched_resubmit_jobs(&ring->sched);
>   
>   		drm_sched_start(&ring->sched, !adev->asic_reset_res);
> @@ -3518,14 +3519,21 @@ static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>   	adev->asic_reset_res = 0;
>   }
>   
> -static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
> +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
>   {
> -	mutex_lock(&adev->lock_reset);
> +	if (trylock) {
> +		if (!mutex_trylock(&adev->lock_reset))
> +			return false;
> +	} else
> +		mutex_lock(&adev->lock_reset);
> +
>   	atomic_inc(&adev->gpu_reset_counter);
>   	adev->in_gpu_reset = 1;
>   	/* Block kfd: SRIOV would do it separately */
>   	if (!amdgpu_sriov_vf(adev))
>                   amdgpu_amdkfd_pre_reset(adev);
> +
> +	return true;
>   }
>   
>   static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
> @@ -3553,38 +3561,43 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
>   int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>   			      struct amdgpu_job *job)
>   {
> -	int r;
> +	int r, i;

BTW: Usual kernel coding style is to use reverse xmas tree. E.g. 
variables like "int i, r" last in the declarations.

>   	struct amdgpu_hive_info *hive = NULL;
> -	bool need_full_reset = false;
>   	struct amdgpu_device *tmp_adev = NULL;
>   	struct list_head device_list, *device_list_handle =  NULL;
> +	bool xgmi_topology_present, need_full_reset, job_signaled;
>   
> +	need_full_reset = job_signaled = false;
>   	INIT_LIST_HEAD(&device_list);
>   
>   	dev_info(adev->dev, "GPU reset begin!\n");
>   
> +	hive = amdgpu_get_xgmi_hive(adev, 0);

The second parameter should actually be a bool, so please use false here.

> +	xgmi_topology_present = hive && adev->gmc.xgmi.num_physical_nodes > 1;

Why are we actually checking num_physical_nodes here?

> +
>   	/*
> -	 * In case of XGMI hive disallow concurrent resets to be triggered
> -	 * by different nodes. No point also since the one node already executing
> -	 * reset will also reset all the other nodes in the hive.
> +	 * Here we trylock to avoid chain of resets executing from
> +	 * either trigger by jobs on different adevs in XGMI hive or jobs on
> +	 * different schedulers for same device while this tdr is running.

Can we please completely drop the name "tdr" and just write "timeout 
handler"?

> +	 * We always reset all schedulers for device and all devices for XGMI
> +	 * hive so that should take care of them too.
>   	 */
> -	hive = amdgpu_get_xgmi_hive(adev, 0);
> -	if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
> -	    !mutex_trylock(&hive->reset_lock))
> +
> +	if (xgmi_topology_present && !mutex_trylock(&hive->reset_lock)) {
> +		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
> +			 job->base.id, hive->hive_id);
>   		return 0;
> +	}
>   
>   	/* Start with adev pre asic reset first for soft reset check.*/
> -	amdgpu_device_lock_adev(adev);
> -	r = amdgpu_device_pre_asic_reset(adev,
> -					 job,
> -					 &need_full_reset);
> -	if (r) {
> -		/*TODO Should we stop ?*/
> -		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
> -			  r, adev->ddev->unique);
> -		adev->asic_reset_res = r;
> +	if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) {
> +		DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
> +					 job->base.id);
> +		return 0;
>   	}
>   
> +	need_full_reset = amdgpu_device_ip_need_full_reset(adev);
> +
>   	/* Build list of devices to reset */
>   	if  (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {

We should probably unconditionally stop all nodes on all device first 
and then try to figure out if we actually need a full reset.

>   		if (!hive) {
> @@ -3603,16 +3616,52 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>   		device_list_handle = &device_list;
>   	}
>   
> +	/* block all schedulers and reset given job's ring */
> +	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
> +		for (i = 0; i < AMDGPU_MAX_RINGSBuild list of devices to reset
> ; ++i) {
> +			struct amdgpu_ring *ring = tmp_adev->rings[i];
> +
> +			if (!ring || !ring->sched.thread)
> +				continue;
> +
> +			drm_sched_stop(&ring->sched, &job->base);
> +		}
> +	}
> +
> +	/*
> +	 * Must check guilty signal here since after this point all old
> +	 * HW fences are force signaled.
> +	 *
> +	 * job->base holds a reference to parent fence
> +	 */
> +	if (job && job->base.s_fence->parent &&
> +	    dma_fence_is_signaled(job->base.s_fence->parent))
> +		job_signaled = true;

It would probably be better if we have a "goto abort;" or something 
similar here.

This also avoids giving the job_signaled variable to all the sub functions.

Christian.

> +
> +
> +	/* Guilty job will be freed after this*/
> +	r = amdgpu_device_pre_asic_reset(adev,
> +					 job,
> +					 &need_full_reset,
> +					 job_signaled);
> +	if (r) {
> +		/*TODO Should we stop ?*/
> +		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
> +			  r, adev->ddev->unique);
> +		adev->asic_reset_res = r;
> +	}
> +
>   retry:	/* Rest of adevs pre asic reset from XGMI hive. */
>   	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>   
>   		if (tmp_adev == adev)
>   			continue;
>   
> -		amdgpu_device_lock_adev(tmp_adev);
> +		amdgpu_device_lock_adev(tmp_adev, false);
>   		r = amdgpu_device_pre_asic_reset(tmp_adev,
>   						 NULL,
> -						 &need_full_reset);
> +						 &need_full_reset,
> +						 job_signaled);
>   		/*TODO Should we stop ?*/
>   		if (r) {
>   			DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
> @@ -3623,19 +3672,21 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>   
>   	/* Actual ASIC resets if needed.*/
>   	/* TODO Implement XGMI hive reset logic for SRIOV */
> -	if (amdgpu_sriov_vf(adev)) {
> -		r = amdgpu_device_reset_sriov(adev, job ? false : true);
> -		if (r)
> -			adev->asic_reset_res = r;
> -	} else {
> -		r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
> -		if (r && r == -EAGAIN)
> -			goto retry;
> +	if (!job || !job_signaled) {
> +		if (amdgpu_sriov_vf(adev)) {
> +			r = amdgpu_device_reset_sriov(adev, job ? false : true);
> +			if (r)
> +				adev->asic_reset_res = r;
> +		} else {
> +			r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
> +			if (r && r == -EAGAIN)
> +				goto retry;
> +		}
>   	}
>   
>   	/* Post ASIC reset for all devs .*/
>   	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
> -		amdgpu_device_post_asic_reset(tmp_adev);
> +		amdgpu_device_post_asic_reset(tmp_adev, job_signaled);
>   
>   		if (r) {
>   			/* bad news, how to tell it to userspace ? */
> @@ -3648,7 +3699,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>   		amdgpu_device_unlock_adev(tmp_adev);
>   	}
>   
> -	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
> +	if (xgmi_topology_present)
>   		mutex_unlock(&hive->reset_lock);
>   
>   	if (r)

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] drm/amd/display: Restore deleted patch to resolve reset deadlock.
       [not found]     ` <1554998624-25799-4-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
@ 2019-04-12  7:40       ` Christian König
  2019-04-12 14:28         ` Grodzovsky, Andrey
  0 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2019-04-12  7:40 UTC (permalink / raw)
  To: Andrey Grodzovsky, dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	eric-WhKQ6XTQaPysTnJN9+BGXg,
	etnaviv-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Nicholas.Kazlauskas-5C7GfCeVMHo

Am 11.04.19 um 18:03 schrieb Andrey Grodzovsky:
> Patch '5edb0c9b Fix deadlock with display during hanged ring recovery'
> was accidentaly removed during one of DALs code merges.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 +++++++++++++------
>   1 file changed, 13 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 0648794..27e0383 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -5138,14 +5138,21 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>   		 */
>   		abo = gem_to_amdgpu_bo(fb->obj[0]);
>   		r = amdgpu_bo_reserve(abo, true);
> -		if (unlikely(r != 0)) {
> +		if (unlikely(r != 0))
>   			DRM_ERROR("failed to reserve buffer before flip\n");
> -			WARN_ON(1);
> -		}

I also already suggested to completely stop waiting while the BO is 
being reserved, but looks like that got dropped as well.

I would say something is seriously wrong with DALs development process here.

Christian.

>   
> -		/* Wait for all fences on this FB */
> -		WARN_ON(reservation_object_wait_timeout_rcu(abo->tbo.resv, true, false,
> -									    MAX_SCHEDULE_TIMEOUT) < 0);
> +		/*
> +		 * Wait for all fences on this FB. Do limited wait to avoid
> +		 * deadlock during GPU reset when this fence will not signal
> +		 * but we hold reservation lock for the BO.
> +		 */
> +		r = reservation_object_wait_timeout_rcu(abo->tbo.resv,
> +							true, false,
> +							msecs_to_jiffies(5000));
> +		if (unlikely(r == 0))
> +			DRM_ERROR("Waiting for fences timed out.");
> +
> +
>   
>   		amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] drm/amd/display: Restore deleted patch to resolve reset deadlock.
  2019-04-12  7:40       ` Christian König
@ 2019-04-12 14:28         ` Grodzovsky, Andrey
  0 siblings, 0 replies; 12+ messages in thread
From: Grodzovsky, Andrey @ 2019-04-12 14:28 UTC (permalink / raw)
  To: Koenig, Christian, dri-devel, amd-gfx, eric, etnaviv; +Cc: Kazlauskas, Nicholas

On 4/12/19 3:40 AM, Christian König wrote:
> Am 11.04.19 um 18:03 schrieb Andrey Grodzovsky:
>> Patch '5edb0c9b Fix deadlock with display during hanged ring recovery'
>> was accidentaly removed during one of DALs code merges.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>> ---
>>   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 
>> +++++++++++++------
>>   1 file changed, 13 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> index 0648794..27e0383 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>> @@ -5138,14 +5138,21 @@ static void amdgpu_dm_commit_planes(struct 
>> drm_atomic_state *state,
>>            */
>>           abo = gem_to_amdgpu_bo(fb->obj[0]);
>>           r = amdgpu_bo_reserve(abo, true);
>> -        if (unlikely(r != 0)) {
>> +        if (unlikely(r != 0))
>>               DRM_ERROR("failed to reserve buffer before flip\n");
>> -            WARN_ON(1);
>> -        }
>
> I also already suggested to completely stop waiting while the BO is 
> being reserved, but looks like that got dropped as well.
>
> I would say something is seriously wrong with DALs development process 
> here.
>
> Christian.


Yea, I think your patch that moved the wait out of the reserved section 
got dropped as well, when I re spin the series with your comments for 
the TDR stuff I will also add a patch restoring your change.

Andrey


>
>>   -        /* Wait for all fences on this FB */
>> - WARN_ON(reservation_object_wait_timeout_rcu(abo->tbo.resv, true, 
>> false,
>> -                                        MAX_SCHEDULE_TIMEOUT) < 0);
>> +        /*
>> +         * Wait for all fences on this FB. Do limited wait to avoid
>> +         * deadlock during GPU reset when this fence will not signal
>> +         * but we hold reservation lock for the BO.
>> +         */
>> +        r = reservation_object_wait_timeout_rcu(abo->tbo.resv,
>> +                            true, false,
>> +                            msecs_to_jiffies(5000));
>> +        if (unlikely(r == 0))
>> +            DRM_ERROR("Waiting for fences timed out.");
>> +
>> +
>>             amdgpu_bo_get_tiling_flags(abo, &tiling_flags);
>
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/4] drm/amdgpu: Avoid HW reset if guilty job already signaled.
       [not found]       ` <04aa4b4a-019e-af55-4869-6deb7652f727-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-04-12 15:53         ` Grodzovsky, Andrey
       [not found]           ` <fc3db187-8402-3ada-817c-f28d3348b1d9-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Grodzovsky, Andrey @ 2019-04-12 15:53 UTC (permalink / raw)
  To: Koenig, Christian, dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	eric-WhKQ6XTQaPysTnJN9+BGXg,
	etnaviv-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kazlauskas, Nicholas


On 4/12/19 3:39 AM, Christian König wrote:
> Am 11.04.19 um 18:03 schrieb Andrey Grodzovsky:
>> Also reject TDRs if another one already running.
>>
>> v2:
>> Stop all schedulers across device and entire XGMI hive before
>> force signaling HW fences.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 125 
>> ++++++++++++++++++++---------
>>   1 file changed, 88 insertions(+), 37 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index aabd043..ce9c494 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -3327,7 +3327,8 @@ bool amdgpu_device_should_recover_gpu(struct 
>> amdgpu_device *adev)
>>     static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>>                       struct amdgpu_job *job,
>> -                    bool *need_full_reset_arg)
>> +                    bool *need_full_reset_arg,
>> +                    bool job_signaled)
>>   {
>>       int i, r = 0;
>>       bool need_full_reset  = *need_full_reset_arg;
>> @@ -3339,8 +3340,6 @@ static int amdgpu_device_pre_asic_reset(struct 
>> amdgpu_device *adev,
>>           if (!ring || !ring->sched.thread)
>>               continue;
>>   -        drm_sched_stop(&ring->sched, &job->base);
>> -
>>           /* after all hw jobs are reset, hw fence is meaningless, so 
>> force_completion */
>>           amdgpu_fence_driver_force_completion(ring);
>>       }
>> @@ -3358,7 +3357,8 @@ static int amdgpu_device_pre_asic_reset(struct 
>> amdgpu_device *adev,
>>       -    if (!amdgpu_sriov_vf(adev)) {
>> +    /* Don't suspend on bare metal if we are not going to HW reset 
>> the ASIC */
>> +    if (!amdgpu_sriov_vf(adev) && !job_signaled) {
>>             if (!need_full_reset)
>>               need_full_reset = amdgpu_device_ip_need_full_reset(adev);
>> @@ -3495,7 +3495,7 @@ static int amdgpu_do_asic_reset(struct 
>> amdgpu_hive_info *hive,
>>       return r;
>>   }
>>   -static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>> +static void amdgpu_device_post_asic_reset(struct amdgpu_device 
>> *adev, bool job_signaled)
>>   {
>>       int i;
>>   @@ -3505,7 +3505,8 @@ static void 
>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>>           if (!ring || !ring->sched.thread)
>>               continue;
>>   -        if (!adev->asic_reset_res)
>> +        /* No point to resubmit jobs if we didn't HW reset*/
>> +        if (!adev->asic_reset_res && !job_signaled)
>>               drm_sched_resubmit_jobs(&ring->sched);
>>             drm_sched_start(&ring->sched, !adev->asic_reset_res);
>> @@ -3518,14 +3519,21 @@ static void 
>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>>       adev->asic_reset_res = 0;
>>   }
>>   -static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
>> +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool 
>> trylock)
>>   {
>> -    mutex_lock(&adev->lock_reset);
>> +    if (trylock) {
>> +        if (!mutex_trylock(&adev->lock_reset))
>> +            return false;
>> +    } else
>> +        mutex_lock(&adev->lock_reset);
>> +
>>       atomic_inc(&adev->gpu_reset_counter);
>>       adev->in_gpu_reset = 1;
>>       /* Block kfd: SRIOV would do it separately */
>>       if (!amdgpu_sriov_vf(adev))
>>                   amdgpu_amdkfd_pre_reset(adev);
>> +
>> +    return true;
>>   }
>>     static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
>> @@ -3553,38 +3561,43 @@ static void amdgpu_device_unlock_adev(struct 
>> amdgpu_device *adev)
>>   int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>                     struct amdgpu_job *job)
>>   {
>> -    int r;
>> +    int r, i;
>
> BTW: Usual kernel coding style is to use reverse xmas tree. E.g. 
> variables like "int i, r" last in the declarations.
>
>>       struct amdgpu_hive_info *hive = NULL;
>> -    bool need_full_reset = false;
>>       struct amdgpu_device *tmp_adev = NULL;
>>       struct list_head device_list, *device_list_handle =  NULL;
>> +    bool xgmi_topology_present, need_full_reset, job_signaled;
>>   +    need_full_reset = job_signaled = false;
>>       INIT_LIST_HEAD(&device_list);
>>         dev_info(adev->dev, "GPU reset begin!\n");
>>   +    hive = amdgpu_get_xgmi_hive(adev, 0);
>
> The second parameter should actually be a bool, so please use false here.
>
>> +    xgmi_topology_present = hive && 
>> adev->gmc.xgmi.num_physical_nodes > 1;
>
> Why are we actually checking num_physical_nodes here?

That the usual way of knowing you have XGMI topology, but having 
hive!=NULL should be equivalent so I can remove it.


>
>> +
>>       /*
>> -     * In case of XGMI hive disallow concurrent resets to be triggered
>> -     * by different nodes. No point also since the one node already 
>> executing
>> -     * reset will also reset all the other nodes in the hive.
>> +     * Here we trylock to avoid chain of resets executing from
>> +     * either trigger by jobs on different adevs in XGMI hive or 
>> jobs on
>> +     * different schedulers for same device while this tdr is running.
>
> Can we please completely drop the name "tdr" and just write "timeout 
> handler"?
>
>> +     * We always reset all schedulers for device and all devices for 
>> XGMI
>> +     * hive so that should take care of them too.
>>        */
>> -    hive = amdgpu_get_xgmi_hive(adev, 0);
>> -    if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
>> -        !mutex_trylock(&hive->reset_lock))
>> +
>> +    if (xgmi_topology_present && !mutex_trylock(&hive->reset_lock)) {
>> +        DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as 
>> another already in progress",
>> +             job->base.id, hive->hive_id);
>>           return 0;
>> +    }
>>         /* Start with adev pre asic reset first for soft reset check.*/
>> -    amdgpu_device_lock_adev(adev);
>> -    r = amdgpu_device_pre_asic_reset(adev,
>> -                     job,
>> -                     &need_full_reset);
>> -    if (r) {
>> -        /*TODO Should we stop ?*/
>> -        DRM_ERROR("GPU pre asic reset failed with err, %d for drm 
>> dev, %s ",
>> -              r, adev->ddev->unique);
>> -        adev->asic_reset_res = r;
>> +    if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) {
>> +        DRM_INFO("Bailing on TDR for s_job:%llx, as another already 
>> in progress",
>> +                     job->base.id);
>> +        return 0;
>>       }
>>   +    need_full_reset = amdgpu_device_ip_need_full_reset(adev);
>> +
>>       /* Build list of devices to reset */
>>       if  (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
>
> We should probably unconditionally stop all nodes on all device first 
> and then try to figure out if we actually need a full reset.
>
>>           if (!hive) {
>> @@ -3603,16 +3616,52 @@ int amdgpu_device_gpu_recover(struct 
>> amdgpu_device *adev,
>>           device_list_handle = &device_list;
>>       }
>>   +    /* block all schedulers and reset given job's ring */
>> +    list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>> +        for (i = 0; i < AMDGPU_MAX_RINGSBuild list of devices to reset
>> ; ++i) {
>> +            struct amdgpu_ring *ring = tmp_adev->rings[i];
>> +
>> +            if (!ring || !ring->sched.thread)
>> +                continue;
>> +
>> +            drm_sched_stop(&ring->sched, &job->base);
>> +        }
>> +    }
>> +
>> +    /*
>> +     * Must check guilty signal here since after this point all old
>> +     * HW fences are force signaled.
>> +     *
>> +     * job->base holds a reference to parent fence
>> +     */
>> +    if (job && job->base.s_fence->parent &&
>> +        dma_fence_is_signaled(job->base.s_fence->parent))
>> +        job_signaled = true;
>
> It would probably be better if we have a "goto abort;" or something 
> similar here.
>
> This also avoids giving the job_signaled variable to all the sub 
> functions.
>
> Christian.


I agree this would be good in case of amdgpu_device_pre_asic_reset 
because we can totally skip this function if guilty job already 
signaled, but for amdgpu_device_post_asic_reset it crates complications 
because drm_sched_start is right in the middle there after 
drm_sched_resubmit_jobs but before forcing back set mode in display so I 
prefer to keep passing 'job_signaled' to amdgpu_device_post_asic_reset. 
Alternative is to get rid of this function and bring it's body into 
amdgpu_device_gpu_recover which is already pretty cluttered and 
confusing.  What do you think ?

Andrey

>
>> +
>> +
>> +    /* Guilty job will be freed after this*/
>> +    r = amdgpu_device_pre_asic_reset(adev,
>> +                     job,
>> +                     &need_full_reset,
>> +                     job_signaled);
>> +    if (r) {
>> +        /*TODO Should we stop ?*/
>> +        DRM_ERROR("GPU pre asic reset failed with err, %d for drm 
>> dev, %s ",
>> +              r, adev->ddev->unique);
>> +        adev->asic_reset_res = r;
>> +    }
>> +
>>   retry:    /* Rest of adevs pre asic reset from XGMI hive. */
>>       list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>>             if (tmp_adev == adev)
>>               continue;
>>   -        amdgpu_device_lock_adev(tmp_adev);
>> +        amdgpu_device_lock_adev(tmp_adev, false);
>>           r = amdgpu_device_pre_asic_reset(tmp_adev,
>>                            NULL,
>> -                         &need_full_reset);
>> +                         &need_full_reset,
>> +                         job_signaled);
>>           /*TODO Should we stop ?*/
>>           if (r) {
>>               DRM_ERROR("GPU pre asic reset failed with err, %d for 
>> drm dev, %s ",
>> @@ -3623,19 +3672,21 @@ int amdgpu_device_gpu_recover(struct 
>> amdgpu_device *adev,
>>         /* Actual ASIC resets if needed.*/
>>       /* TODO Implement XGMI hive reset logic for SRIOV */
>> -    if (amdgpu_sriov_vf(adev)) {
>> -        r = amdgpu_device_reset_sriov(adev, job ? false : true);
>> -        if (r)
>> -            adev->asic_reset_res = r;
>> -    } else {
>> -        r  = amdgpu_do_asic_reset(hive, device_list_handle, 
>> &need_full_reset);
>> -        if (r && r == -EAGAIN)
>> -            goto retry;
>> +    if (!job || !job_signaled) {
>> +        if (amdgpu_sriov_vf(adev)) {
>> +            r = amdgpu_device_reset_sriov(adev, job ? false : true);
>> +            if (r)
>> +                adev->asic_reset_res = r;
>> +        } else {
>> +            r  = amdgpu_do_asic_reset(hive, device_list_handle, 
>> &need_full_reset);
>> +            if (r && r == -EAGAIN)
>> +                goto retry;
>> +        }
>>       }
>>         /* Post ASIC reset for all devs .*/
>>       list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>> -        amdgpu_device_post_asic_reset(tmp_adev);
>> +        amdgpu_device_post_asic_reset(tmp_adev, job_signaled);
>>             if (r) {
>>               /* bad news, how to tell it to userspace ? */
>> @@ -3648,7 +3699,7 @@ int amdgpu_device_gpu_recover(struct 
>> amdgpu_device *adev,
>>           amdgpu_device_unlock_adev(tmp_adev);
>>       }
>>   -    if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
>> +    if (xgmi_topology_present)
>>           mutex_unlock(&hive->reset_lock);
>>         if (r)
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/4] drm/amdgpu: Avoid HW reset if guilty job already signaled.
       [not found]           ` <fc3db187-8402-3ada-817c-f28d3348b1d9-5C7GfCeVMHo@public.gmane.org>
@ 2019-04-15  6:46             ` Koenig, Christian
  2019-04-15 15:15               ` Grodzovsky, Andrey
  0 siblings, 1 reply; 12+ messages in thread
From: Koenig, Christian @ 2019-04-15  6:46 UTC (permalink / raw)
  To: Grodzovsky, Andrey, dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	eric-WhKQ6XTQaPysTnJN9+BGXg,
	etnaviv-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Kazlauskas, Nicholas

Am 12.04.19 um 17:53 schrieb Grodzovsky, Andrey:
> On 4/12/19 3:39 AM, Christian König wrote:
>> Am 11.04.19 um 18:03 schrieb Andrey Grodzovsky:
>>> Also reject TDRs if another one already running.
>>>
>>> v2:
>>> Stop all schedulers across device and entire XGMI hive before
>>> force signaling HW fences.
>>>
>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 125
>>> ++++++++++++++++++++---------
>>>    1 file changed, 88 insertions(+), 37 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index aabd043..ce9c494 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -3327,7 +3327,8 @@ bool amdgpu_device_should_recover_gpu(struct
>>> amdgpu_device *adev)
>>>      static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>>>                        struct amdgpu_job *job,
>>> -                    bool *need_full_reset_arg)
>>> +                    bool *need_full_reset_arg,
>>> +                    bool job_signaled)
>>>    {
>>>        int i, r = 0;
>>>        bool need_full_reset  = *need_full_reset_arg;
>>> @@ -3339,8 +3340,6 @@ static int amdgpu_device_pre_asic_reset(struct
>>> amdgpu_device *adev,
>>>            if (!ring || !ring->sched.thread)
>>>                continue;
>>>    -        drm_sched_stop(&ring->sched, &job->base);
>>> -
>>>            /* after all hw jobs are reset, hw fence is meaningless, so
>>> force_completion */
>>>            amdgpu_fence_driver_force_completion(ring);
>>>        }
>>> @@ -3358,7 +3357,8 @@ static int amdgpu_device_pre_asic_reset(struct
>>> amdgpu_device *adev,
>>>        -    if (!amdgpu_sriov_vf(adev)) {
>>> +    /* Don't suspend on bare metal if we are not going to HW reset
>>> the ASIC */
>>> +    if (!amdgpu_sriov_vf(adev) && !job_signaled) {
>>>              if (!need_full_reset)
>>>                need_full_reset = amdgpu_device_ip_need_full_reset(adev);
>>> @@ -3495,7 +3495,7 @@ static int amdgpu_do_asic_reset(struct
>>> amdgpu_hive_info *hive,
>>>        return r;
>>>    }
>>>    -static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>>> +static void amdgpu_device_post_asic_reset(struct amdgpu_device
>>> *adev, bool job_signaled)
>>>    {
>>>        int i;
>>>    @@ -3505,7 +3505,8 @@ static void
>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>>>            if (!ring || !ring->sched.thread)
>>>                continue;
>>>    -        if (!adev->asic_reset_res)
>>> +        /* No point to resubmit jobs if we didn't HW reset*/
>>> +        if (!adev->asic_reset_res && !job_signaled)
>>>                drm_sched_resubmit_jobs(&ring->sched);
>>>              drm_sched_start(&ring->sched, !adev->asic_reset_res);
>>> @@ -3518,14 +3519,21 @@ static void
>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>>>        adev->asic_reset_res = 0;
>>>    }
>>>    -static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
>>> +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool
>>> trylock)
>>>    {
>>> -    mutex_lock(&adev->lock_reset);
>>> +    if (trylock) {
>>> +        if (!mutex_trylock(&adev->lock_reset))
>>> +            return false;
>>> +    } else
>>> +        mutex_lock(&adev->lock_reset);
>>> +
>>>        atomic_inc(&adev->gpu_reset_counter);
>>>        adev->in_gpu_reset = 1;
>>>        /* Block kfd: SRIOV would do it separately */
>>>        if (!amdgpu_sriov_vf(adev))
>>>                    amdgpu_amdkfd_pre_reset(adev);
>>> +
>>> +    return true;
>>>    }
>>>      static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
>>> @@ -3553,38 +3561,43 @@ static void amdgpu_device_unlock_adev(struct
>>> amdgpu_device *adev)
>>>    int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>>                      struct amdgpu_job *job)
>>>    {
>>> -    int r;
>>> +    int r, i;
>> BTW: Usual kernel coding style is to use reverse xmas tree. E.g.
>> variables like "int i, r" last in the declarations.
>>
>>>        struct amdgpu_hive_info *hive = NULL;
>>> -    bool need_full_reset = false;
>>>        struct amdgpu_device *tmp_adev = NULL;
>>>        struct list_head device_list, *device_list_handle =  NULL;
>>> +    bool xgmi_topology_present, need_full_reset, job_signaled;
>>>    +    need_full_reset = job_signaled = false;
>>>        INIT_LIST_HEAD(&device_list);
>>>          dev_info(adev->dev, "GPU reset begin!\n");
>>>    +    hive = amdgpu_get_xgmi_hive(adev, 0);
>> The second parameter should actually be a bool, so please use false here.
>>
>>> +    xgmi_topology_present = hive &&
>>> adev->gmc.xgmi.num_physical_nodes > 1;
>> Why are we actually checking num_physical_nodes here?
> That the usual way of knowing you have XGMI topology, but having
> hive!=NULL should be equivalent so I can remove it.
>
>
>>> +
>>>        /*
>>> -     * In case of XGMI hive disallow concurrent resets to be triggered
>>> -     * by different nodes. No point also since the one node already
>>> executing
>>> -     * reset will also reset all the other nodes in the hive.
>>> +     * Here we trylock to avoid chain of resets executing from
>>> +     * either trigger by jobs on different adevs in XGMI hive or
>>> jobs on
>>> +     * different schedulers for same device while this tdr is running.
>> Can we please completely drop the name "tdr" and just write "timeout
>> handler"?
>>
>>> +     * We always reset all schedulers for device and all devices for
>>> XGMI
>>> +     * hive so that should take care of them too.
>>>         */
>>> -    hive = amdgpu_get_xgmi_hive(adev, 0);
>>> -    if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
>>> -        !mutex_trylock(&hive->reset_lock))
>>> +
>>> +    if (xgmi_topology_present && !mutex_trylock(&hive->reset_lock)) {
>>> +        DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as
>>> another already in progress",
>>> +             job->base.id, hive->hive_id);
>>>            return 0;
>>> +    }
>>>          /* Start with adev pre asic reset first for soft reset check.*/
>>> -    amdgpu_device_lock_adev(adev);
>>> -    r = amdgpu_device_pre_asic_reset(adev,
>>> -                     job,
>>> -                     &need_full_reset);
>>> -    if (r) {
>>> -        /*TODO Should we stop ?*/
>>> -        DRM_ERROR("GPU pre asic reset failed with err, %d for drm
>>> dev, %s ",
>>> -              r, adev->ddev->unique);
>>> -        adev->asic_reset_res = r;
>>> +    if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) {
>>> +        DRM_INFO("Bailing on TDR for s_job:%llx, as another already
>>> in progress",
>>> +                     job->base.id);
>>> +        return 0;
>>>        }
>>>    +    need_full_reset = amdgpu_device_ip_need_full_reset(adev);
>>> +
>>>        /* Build list of devices to reset */
>>>        if  (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
>> We should probably unconditionally stop all nodes on all device first
>> and then try to figure out if we actually need a full reset.
>>
>>>            if (!hive) {
>>> @@ -3603,16 +3616,52 @@ int amdgpu_device_gpu_recover(struct
>>> amdgpu_device *adev,
>>>            device_list_handle = &device_list;
>>>        }
>>>    +    /* block all schedulers and reset given job's ring */
>>> +    list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>>> +        for (i = 0; i < AMDGPU_MAX_RINGSBuild list of devices to reset
>>> ; ++i) {
>>> +            struct amdgpu_ring *ring = tmp_adev->rings[i];
>>> +
>>> +            if (!ring || !ring->sched.thread)
>>> +                continue;
>>> +
>>> +            drm_sched_stop(&ring->sched, &job->base);
>>> +        }
>>> +    }
>>> +
>>> +    /*
>>> +     * Must check guilty signal here since after this point all old
>>> +     * HW fences are force signaled.
>>> +     *
>>> +     * job->base holds a reference to parent fence
>>> +     */
>>> +    if (job && job->base.s_fence->parent &&
>>> +        dma_fence_is_signaled(job->base.s_fence->parent))
>>> +        job_signaled = true;
>> It would probably be better if we have a "goto abort;" or something
>> similar here.
>>
>> This also avoids giving the job_signaled variable to all the sub
>> functions.
>>
>> Christian.
>
> I agree this would be good in case of amdgpu_device_pre_asic_reset
> because we can totally skip this function if guilty job already
> signaled, but for amdgpu_device_post_asic_reset it crates complications
> because drm_sched_start is right in the middle there after
> drm_sched_resubmit_jobs but before forcing back set mode in display so I
> prefer to keep passing 'job_signaled' to amdgpu_device_post_asic_reset.
> Alternative is to get rid of this function and bring it's body into
> amdgpu_device_gpu_recover which is already pretty cluttered and
> confusing.  What do you think ?

Yeah, that's what I meant with that this still looks rather unorganized.

How about splitting up amdgpu_device_post_asic_reset into more 
functions? Would that work out as well?

I think what we should do is to keep amdgpu_device_gpu_recover() the top 
level control logic, e.g. which step is called on which device in which 
order.

We should not push that decision into the individual steps, because that 
would make things even more confusing.

Regards,
Christian.

>
> Andrey
>
>>> +
>>> +
>>> +    /* Guilty job will be freed after this*/
>>> +    r = amdgpu_device_pre_asic_reset(adev,
>>> +                     job,
>>> +                     &need_full_reset,
>>> +                     job_signaled);
>>> +    if (r) {
>>> +        /*TODO Should we stop ?*/
>>> +        DRM_ERROR("GPU pre asic reset failed with err, %d for drm
>>> dev, %s ",
>>> +              r, adev->ddev->unique);
>>> +        adev->asic_reset_res = r;
>>> +    }
>>> +
>>>    retry:    /* Rest of adevs pre asic reset from XGMI hive. */
>>>        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>>>              if (tmp_adev == adev)
>>>                continue;
>>>    -        amdgpu_device_lock_adev(tmp_adev);
>>> +        amdgpu_device_lock_adev(tmp_adev, false);
>>>            r = amdgpu_device_pre_asic_reset(tmp_adev,
>>>                             NULL,
>>> -                         &need_full_reset);
>>> +                         &need_full_reset,
>>> +                         job_signaled);
>>>            /*TODO Should we stop ?*/
>>>            if (r) {
>>>                DRM_ERROR("GPU pre asic reset failed with err, %d for
>>> drm dev, %s ",
>>> @@ -3623,19 +3672,21 @@ int amdgpu_device_gpu_recover(struct
>>> amdgpu_device *adev,
>>>          /* Actual ASIC resets if needed.*/
>>>        /* TODO Implement XGMI hive reset logic for SRIOV */
>>> -    if (amdgpu_sriov_vf(adev)) {
>>> -        r = amdgpu_device_reset_sriov(adev, job ? false : true);
>>> -        if (r)
>>> -            adev->asic_reset_res = r;
>>> -    } else {
>>> -        r  = amdgpu_do_asic_reset(hive, device_list_handle,
>>> &need_full_reset);
>>> -        if (r && r == -EAGAIN)
>>> -            goto retry;
>>> +    if (!job || !job_signaled) {
>>> +        if (amdgpu_sriov_vf(adev)) {
>>> +            r = amdgpu_device_reset_sriov(adev, job ? false : true);
>>> +            if (r)
>>> +                adev->asic_reset_res = r;
>>> +        } else {
>>> +            r  = amdgpu_do_asic_reset(hive, device_list_handle,
>>> &need_full_reset);
>>> +            if (r && r == -EAGAIN)
>>> +                goto retry;
>>> +        }
>>>        }
>>>          /* Post ASIC reset for all devs .*/
>>>        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>>> -        amdgpu_device_post_asic_reset(tmp_adev);
>>> +        amdgpu_device_post_asic_reset(tmp_adev, job_signaled);
>>>              if (r) {
>>>                /* bad news, how to tell it to userspace ? */
>>> @@ -3648,7 +3699,7 @@ int amdgpu_device_gpu_recover(struct
>>> amdgpu_device *adev,
>>>            amdgpu_device_unlock_adev(tmp_adev);
>>>        }
>>>    -    if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
>>> +    if (xgmi_topology_present)
>>>            mutex_unlock(&hive->reset_lock);
>>>          if (r)

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/4] drm/amdgpu: Avoid HW reset if guilty job already signaled.
  2019-04-15  6:46             ` Koenig, Christian
@ 2019-04-15 15:15               ` Grodzovsky, Andrey
  0 siblings, 0 replies; 12+ messages in thread
From: Grodzovsky, Andrey @ 2019-04-15 15:15 UTC (permalink / raw)
  To: Koenig, Christian, dri-devel, amd-gfx, eric, etnaviv; +Cc: Kazlauskas, Nicholas


[-- Attachment #1.1: Type: text/plain, Size: 1445 bytes --]


On 4/15/19 2:46 AM, Koenig, Christian wrote:

I agree this would be good in case of amdgpu_device_pre_asic_reset
because we can totally skip this function if guilty job already
signaled, but for amdgpu_device_post_asic_reset it crates complications
because drm_sched_start is right in the middle there after
drm_sched_resubmit_jobs but before forcing back set mode in display so I
prefer to keep passing 'job_signaled' to amdgpu_device_post_asic_reset.
Alternative is to get rid of this function and bring it's body into
amdgpu_device_gpu_recover which is already pretty cluttered and
confusing.  What do you think ?


Yeah, that's what I meant with that this still looks rather unorganized.

How about splitting up amdgpu_device_post_asic_reset into more
functions? Would that work out as well?

I looked into it and seems the simplest way is just to move this function body into the main reset function since it's pretty short function and there is internal loop across all rings inside.

I resent the patches and also amended your lost display  patch 'wait for fence without holding reservation lock'.

Andrey




I think what we should do is to keep amdgpu_device_gpu_recover() the top
level control logic, e.g. which step is called on which device in which
order.

We should not push that decision into the individual steps, because that
would make things even more confusing.

Regards,
Christian.



[-- Attachment #1.2: Type: text/html, Size: 2076 bytes --]

[-- Attachment #2: Type: text/plain, Size: 159 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-04-15 15:15 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-04-11 16:03 [PATCH 1/4] drm/scheduler: rework job destruction Andrey Grodzovsky
2019-04-11 16:03 ` [PATCH 2/4] drm/sched: Keep s_fence->parent pointer Andrey Grodzovsky
     [not found] ` <1554998624-25799-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
2019-04-11 16:03   ` [PATCH 3/4] drm/amdgpu: Avoid HW reset if guilty job already signaled Andrey Grodzovsky
2019-04-12  7:39     ` Christian König
     [not found]       ` <04aa4b4a-019e-af55-4869-6deb7652f727-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-04-12 15:53         ` Grodzovsky, Andrey
     [not found]           ` <fc3db187-8402-3ada-817c-f28d3348b1d9-5C7GfCeVMHo@public.gmane.org>
2019-04-15  6:46             ` Koenig, Christian
2019-04-15 15:15               ` Grodzovsky, Andrey
2019-04-11 16:03   ` [PATCH 4/4] drm/amd/display: Restore deleted patch to resolve reset deadlock Andrey Grodzovsky
2019-04-11 16:41     ` Kazlauskas, Nicholas
2019-04-11 16:57       ` Grodzovsky, Andrey
     [not found]     ` <1554998624-25799-4-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
2019-04-12  7:40       ` Christian König
2019-04-12 14:28         ` Grodzovsky, Andrey

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.