All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/5] drm/amdgpu:cleanup job reset routine
@ 2017-10-23  5:34 Monk Liu
       [not found] ` <1508736862-8873-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Monk Liu @ 2017-10-23  5:34 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

merge the setting guilty on context into this function
to avoid implement extra routine.

Change-Id: I7a0063464fdc85d5ac9080046380e745565ff540
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  4 ++--
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 13 ++++++++++++-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  2 +-
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index dae1e5b..a07544d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2868,7 +2868,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job)
 			amd_sched_job_kickout(&job->base);
 
 		/* only do job_reset on the hang ring if @job not NULL */
-		amd_sched_hw_job_reset(&ring->sched);
+		amd_sched_hw_job_reset(&ring->sched, NULL);
 
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
@@ -2989,7 +2989,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
 		if (!ring || !ring->sched.thread)
 			continue;
 		kthread_park(ring->sched.thread);
-		amd_sched_hw_job_reset(&ring->sched);
+		amd_sched_hw_job_reset(&ring->sched, NULL);
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
 	}
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index e4d3b4e..322b6c1 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -442,7 +442,14 @@ static void amd_sched_job_timedout(struct work_struct *work)
 	job->sched->ops->timedout_job(job);
 }
 
-void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
+static void amd_sched_set_guilty(struct amd_sched_job *s_job)
+{
+	if (atomic_inc_return(&s_job->karma) > s_job->sched->hang_limit)
+		if (s_job->s_entity->guilty)
+			atomic_set(s_job->s_entity->guilty, 1);
+}
+
+void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_job *bad)
 {
 	struct amd_sched_job *s_job;
 
@@ -455,6 +462,10 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
 			s_job->s_fence->parent = NULL;
 			atomic_dec(&sched->hw_rq_count);
 		}
+
+		if (bad && bad == s_job) {
+			amd_sched_set_guilty(s_job);
+		}
 	}
 	spin_unlock(&sched->job_list_lock);
 }
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 52c8e54..d26e778 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -170,7 +170,7 @@ int amd_sched_job_init(struct amd_sched_job *job,
 		       struct amd_gpu_scheduler *sched,
 		       struct amd_sched_entity *entity,
 		       void *owner);
-void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched);
+void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_job *job);
 void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
 bool amd_sched_dependency_optimized(struct dma_fence* fence,
 				    struct amd_sched_entity *entity);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/5] drm/amdgpu:add hang_limit for sched(v2)
       [not found] ` <1508736862-8873-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-23  5:34   ` Monk Liu
  2017-10-23  5:34   ` [PATCH 3/5] drm/amd/scheduler:introduce guilty pointer member Monk Liu
                     ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Monk Liu @ 2017-10-23  5:34 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

since gpu_scheduler source domain cannot access amdgpu variable
so need create the hang_limit membewr for sched, and it can
refer it for the upcoming GPU RESET patches

v2:
make hang_limit a parameter of sched_init()

Change-Id: I977ae2717e55a8b87c59e58a288bffc3b458b653
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c     | 2 +-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 6 +++++-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 ++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 2167dac..f43319c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -446,7 +446,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 			timeout = MAX_SCHEDULE_TIMEOUT;
 		}
 		r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
-				   num_hw_submission,
+				   num_hw_submission, amdgpu_job_hang_limit,
 				   timeout, ring->name);
 		if (r) {
 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 322b6c1..dfda486 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -686,13 +686,17 @@ static int amd_sched_main(void *param)
 */
 int amd_sched_init(struct amd_gpu_scheduler *sched,
 		   const struct amd_sched_backend_ops *ops,
-		   unsigned hw_submission, long timeout, const char *name)
+		   unsigned hw_submission,
+		   unsigned hang_limit,
+		   long timeout,
+		   const char *name)
 {
 	int i;
 	sched->ops = ops;
 	sched->hw_submission_limit = hw_submission;
 	sched->name = name;
 	sched->timeout = timeout;
+	sched->hang_limit = hang_limit;
 	for (i = AMD_SCHED_PRIORITY_MIN; i < AMD_SCHED_PRIORITY_MAX; i++)
 		amd_sched_rq_init(&sched->sched_rq[i]);
 
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index d26e778..774b5da 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -142,11 +142,12 @@ struct amd_gpu_scheduler {
 	struct task_struct		*thread;
 	struct list_head	ring_mirror_list;
 	spinlock_t			job_list_lock;
+	int hang_limit;
 };
 
 int amd_sched_init(struct amd_gpu_scheduler *sched,
 		   const struct amd_sched_backend_ops *ops,
-		   uint32_t hw_submission, long timeout, const char *name);
+		   uint32_t hw_submission, unsigned hang_limit, long timeout, const char *name);
 void amd_sched_fini(struct amd_gpu_scheduler *sched);
 
 int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/5] drm/amd/scheduler:introduce guilty pointer member
       [not found] ` <1508736862-8873-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-10-23  5:34   ` [PATCH 2/5] drm/amdgpu:add hang_limit for sched(v2) Monk Liu
@ 2017-10-23  5:34   ` Monk Liu
  2017-10-23  5:34   ` [PATCH 4/5] drm/amdgpu:pass ctx->guilty address to entity init Monk Liu
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Monk Liu @ 2017-10-23  5:34 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

this member will be used later, it will points to
the real var inside of context and CS_SUBMIT & gpu schdduler
can decide if skip a job depends on context->guilty or *entity->guilty

Change-Id: I411e117a01c54286db0765fd2f6bf9d3bda01a3b
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c       | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c       | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c       | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c       | 4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 2 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c         | 2 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c         | 2 +-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 3 ++-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 3 ++-
 10 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index c184468..bb5a46a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -90,7 +90,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			continue;
 
 		r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
-					  rq, amdgpu_sched_jobs);
+					  rq, amdgpu_sched_jobs, NULL);
 		if (r)
 			goto failed;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index dcdfb8d..af65cb0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -110,7 +110,7 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
 	ring = adev->mman.buffer_funcs_ring;
 	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
 	r = amd_sched_entity_init(&ring->sched, &adev->mman.entity,
-				  rq, amdgpu_sched_jobs);
+				  rq, amdgpu_sched_jobs, NULL);
 	if (r) {
 		DRM_ERROR("Failed setting up TTM BO move run queue.\n");
 		goto error_entity;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index e8bd50c..6604771 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -232,7 +232,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	ring = &adev->uvd.ring;
 	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
 	r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity,
-				  rq, amdgpu_sched_jobs);
+				  rq, amdgpu_sched_jobs, NULL);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up UVD run queue.\n");
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index b46280c..60a8fb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -176,7 +176,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 	ring = &adev->vce.ring[0];
 	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
 	r = amd_sched_entity_init(&ring->sched, &adev->vce.entity,
-				  rq, amdgpu_sched_jobs);
+				  rq, amdgpu_sched_jobs, NULL);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCE run queue.\n");
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 041e012..96df21c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -106,7 +106,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 	ring = &adev->vcn.ring_dec;
 	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
 	r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec,
-				  rq, amdgpu_sched_jobs);
+				  rq, amdgpu_sched_jobs, NULL);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCN dec run queue.\n");
 		return r;
@@ -115,7 +115,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 	ring = &adev->vcn.ring_enc[0];
 	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
 	r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_enc,
-				  rq, amdgpu_sched_jobs);
+				  rq, amdgpu_sched_jobs, NULL);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCN enc run queue.\n");
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ef8b7a9..3e1959e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2637,7 +2637,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	ring = adev->vm_manager.vm_pte_rings[ring_instance];
 	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
 	r = amd_sched_entity_init(&ring->sched, &vm->entity,
-				  rq, amdgpu_sched_jobs);
+				  rq, amdgpu_sched_jobs, NULL);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 71299c6..f4d25a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -412,7 +412,7 @@ static int uvd_v6_0_sw_init(void *handle)
 		ring = &adev->uvd.ring_enc[0];
 		rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
 		r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity_enc,
-					  rq, amdgpu_sched_jobs);
+					  rq, amdgpu_sched_jobs, NULL);
 		if (r) {
 			DRM_ERROR("Failed setting up UVD ENC run queue.\n");
 			return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index b8ed8fa..8dbb409 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -418,7 +418,7 @@ static int uvd_v7_0_sw_init(void *handle)
 	ring = &adev->uvd.ring_enc[0];
 	rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
 	r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity_enc,
-				  rq, amdgpu_sched_jobs);
+				  rq, amdgpu_sched_jobs, NULL);
 	if (r) {
 		DRM_ERROR("Failed setting up UVD ENC run queue.\n");
 		return r;
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index dfda486..70c9275 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -121,7 +121,7 @@ amd_sched_rq_select_entity(struct amd_sched_rq *rq)
 int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
 			  struct amd_sched_entity *entity,
 			  struct amd_sched_rq *rq,
-			  uint32_t jobs)
+			  uint32_t jobs, atomic_t *guilty)
 {
 	int r;
 
@@ -132,6 +132,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
 	INIT_LIST_HEAD(&entity->list);
 	entity->rq = rq;
 	entity->sched = sched;
+	entity->guilty = guilty;
 
 	spin_lock_init(&entity->rq_lock);
 	spin_lock_init(&entity->queue_lock);
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 774b5da..ee79c63 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -50,6 +50,7 @@ struct amd_sched_entity {
 
 	struct dma_fence		*dependency;
 	struct dma_fence_cb		cb;
+	atomic_t	*guilty; /* points to ctx's guilty */
 };
 
 /**
@@ -153,7 +154,7 @@ void amd_sched_fini(struct amd_gpu_scheduler *sched);
 int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
 			  struct amd_sched_entity *entity,
 			  struct amd_sched_rq *rq,
-			  uint32_t jobs);
+			  uint32_t jobs, atomic_t* guilty);
 void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
 			   struct amd_sched_entity *entity);
 void amd_sched_entity_push_job(struct amd_sched_job *sched_job);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/5] drm/amdgpu:pass ctx->guilty address to entity init
       [not found] ` <1508736862-8873-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-10-23  5:34   ` [PATCH 2/5] drm/amdgpu:add hang_limit for sched(v2) Monk Liu
  2017-10-23  5:34   ` [PATCH 3/5] drm/amd/scheduler:introduce guilty pointer member Monk Liu
@ 2017-10-23  5:34   ` Monk Liu
  2017-10-23  5:34   ` [PATCH 5/5] drm/amdgpu:skip job for guilty ctx in parser_init Monk Liu
  2017-10-23  5:45   ` [PATCH 1/5] drm/amdgpu:cleanup job reset routine Liu, Monk
  4 siblings, 0 replies; 6+ messages in thread
From: Monk Liu @ 2017-10-23  5:34 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

this way the real interested guilty is connected to entity->guilty
pointer, and we can use entity->pointer later in gpu recovery procedure

Change-Id: I09b392ebfc59254795e4fbd5816abd3d94a95853
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 774edc1..6a4178b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -744,6 +744,7 @@ struct amdgpu_ctx {
 	enum amd_sched_priority init_priority;
 	enum amd_sched_priority override_priority;
 	struct mutex            lock;
+	atomic_t	guilty;
 };
 
 struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index bb5a46a..1bf4cdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -90,7 +90,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			continue;
 
 		r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
-					  rq, amdgpu_sched_jobs, NULL);
+					  rq, amdgpu_sched_jobs, &ctx->guilty);
 		if (r)
 			goto failed;
 	}
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 5/5] drm/amdgpu:skip job for guilty ctx in parser_init
       [not found] ` <1508736862-8873-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-10-23  5:34   ` [PATCH 4/5] drm/amdgpu:pass ctx->guilty address to entity init Monk Liu
@ 2017-10-23  5:34   ` Monk Liu
  2017-10-23  5:45   ` [PATCH 1/5] drm/amdgpu:cleanup job reset routine Liu, Monk
  4 siblings, 0 replies; 6+ messages in thread
From: Monk Liu @ 2017-10-23  5:34 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

Change-Id: I44019f6475b1eaaba55633cf5f8bb84284f19a2c
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f7fceb6..1d5cbe7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -90,6 +90,12 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		goto free_chunk;
 	}
 
+	/* skip guilty context job */
+	if (atomic_read(&p->ctx->guilty) == 1) {
+		ret = -ECANCELED;
+		goto free_chunk;
+	}
+
 	mutex_lock(&p->ctx->lock);
 
 	/* get chunks */
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* RE: [PATCH 1/5] drm/amdgpu:cleanup job reset routine
       [not found] ` <1508736862-8873-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (3 preceding siblings ...)
  2017-10-23  5:34   ` [PATCH 5/5] drm/amdgpu:skip job for guilty ctx in parser_init Monk Liu
@ 2017-10-23  5:45   ` Liu, Monk
  4 siblings, 0 replies; 6+ messages in thread
From: Liu, Monk @ 2017-10-23  5:45 UTC (permalink / raw)
  To: Liu, Monk, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Sorry, this patch is invalide, please ignore this one, will send out v2 for review

-----Original Message-----
From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf Of Monk Liu
Sent: 2017年10月23日 13:34
To: amd-gfx@lists.freedesktop.org
Cc: Liu, Monk <Monk.Liu@amd.com>
Subject: [PATCH 1/5] drm/amdgpu:cleanup job reset routine

merge the setting guilty on context into this function to avoid implement extra routine.

Change-Id: I7a0063464fdc85d5ac9080046380e745565ff540
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  4 ++--
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 13 ++++++++++++-  drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  2 +-
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index dae1e5b..a07544d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2868,7 +2868,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job)
 			amd_sched_job_kickout(&job->base);
 
 		/* only do job_reset on the hang ring if @job not NULL */
-		amd_sched_hw_job_reset(&ring->sched);
+		amd_sched_hw_job_reset(&ring->sched, NULL);
 
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
@@ -2989,7 +2989,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
 		if (!ring || !ring->sched.thread)
 			continue;
 		kthread_park(ring->sched.thread);
-		amd_sched_hw_job_reset(&ring->sched);
+		amd_sched_hw_job_reset(&ring->sched, NULL);
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
 	}
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index e4d3b4e..322b6c1 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -442,7 +442,14 @@ static void amd_sched_job_timedout(struct work_struct *work)
 	job->sched->ops->timedout_job(job);
 }
 
-void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
+static void amd_sched_set_guilty(struct amd_sched_job *s_job) {
+	if (atomic_inc_return(&s_job->karma) > s_job->sched->hang_limit)
+		if (s_job->s_entity->guilty)
+			atomic_set(s_job->s_entity->guilty, 1); }
+
+void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct 
+amd_sched_job *bad)
 {
 	struct amd_sched_job *s_job;
 
@@ -455,6 +462,10 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
 			s_job->s_fence->parent = NULL;
 			atomic_dec(&sched->hw_rq_count);
 		}
+
+		if (bad && bad == s_job) {
+			amd_sched_set_guilty(s_job);
+		}
 	}
 	spin_unlock(&sched->job_list_lock);
 }
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 52c8e54..d26e778 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -170,7 +170,7 @@ int amd_sched_job_init(struct amd_sched_job *job,
 		       struct amd_gpu_scheduler *sched,
 		       struct amd_sched_entity *entity,
 		       void *owner);
-void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched);
+void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct 
+amd_sched_job *job);
 void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);  bool amd_sched_dependency_optimized(struct dma_fence* fence,
 				    struct amd_sched_entity *entity);
--
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2017-10-23  5:45 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-23  5:34 [PATCH 1/5] drm/amdgpu:cleanup job reset routine Monk Liu
     [not found] ` <1508736862-8873-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-10-23  5:34   ` [PATCH 2/5] drm/amdgpu:add hang_limit for sched(v2) Monk Liu
2017-10-23  5:34   ` [PATCH 3/5] drm/amd/scheduler:introduce guilty pointer member Monk Liu
2017-10-23  5:34   ` [PATCH 4/5] drm/amdgpu:pass ctx->guilty address to entity init Monk Liu
2017-10-23  5:34   ` [PATCH 5/5] drm/amdgpu:skip job for guilty ctx in parser_init Monk Liu
2017-10-23  5:45   ` [PATCH 1/5] drm/amdgpu:cleanup job reset routine Liu, Monk

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.