All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 1/2] drm/sched: Add boolean to mark if sched is ready to work v2
@ 2018-10-19 20:52 Andrey Grodzovsky
  2018-10-19 20:52 ` [PATCH v2 2/2] drm/amdgpu: Retire amdgpu_ring.ready flag Andrey Grodzovsky
       [not found] ` <1539982364-6533-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 2 replies; 6+ messages in thread
From: Andrey Grodzovsky @ 2018-10-19 20:52 UTC (permalink / raw)
  To: amd-gfx, dri-devel; +Cc: Alexander.Deucher, christian.koenig

Problem:
A particular scheduler may become unsuable (underlying HW) after
some event (e.g. GPU reset). If it's later chosen by
the get free sched. policy a command will fail to be
submitted.

Fix:
Add a driver specific callback to report the sched status so
rq with bad sched can be avoided in favor of working one or
none in which case job init will fail.

v2: Switch from driver callback to flag in scheduler.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c |  2 +-
 drivers/gpu/drm/etnaviv/etnaviv_sched.c   |  2 +-
 drivers/gpu/drm/scheduler/sched_entity.c  |  9 ++++++++-
 drivers/gpu/drm/scheduler/sched_main.c    | 10 +++++++++-
 drivers/gpu/drm/v3d/v3d_sched.c           |  4 ++--
 include/drm/gpu_scheduler.h               |  5 ++++-
 6 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 5448cf2..bf845b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -450,7 +450,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 
 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
 				   num_hw_submission, amdgpu_job_hang_limit,
-				   timeout, ring->name);
+				   timeout, ring->name, false);
 		if (r) {
 			DRM_ERROR("Failed to create scheduler on ring %s.\n",
 				  ring->name);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index f8c5f1e..9dca347 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -178,7 +178,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
 
 	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
 			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
-			     msecs_to_jiffies(500), dev_name(gpu->dev));
+			     msecs_to_jiffies(500), dev_name(gpu->dev), true);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 3e22a54..ba54c30 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -130,7 +130,14 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity *entity)
 	int i;
 
 	for (i = 0; i < entity->num_rq_list; ++i) {
-		num_jobs = atomic_read(&entity->rq_list[i]->sched->num_jobs);
+		struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
+
+		if (!entity->rq_list[i]->sched->ready) {
+			DRM_WARN("sched%s is not ready, skipping", sched->name);
+			continue;
+		}
+
+		num_jobs = atomic_read(&sched->num_jobs);
 		if (num_jobs < min_jobs) {
 			min_jobs = num_jobs;
 			rq = entity->rq_list[i];
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 63b997d..772adec 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -420,6 +420,9 @@ int drm_sched_job_init(struct drm_sched_job *job,
 	struct drm_gpu_scheduler *sched;
 
 	drm_sched_entity_select_rq(entity);
+	if (!entity->rq)
+		return -ENOENT;
+
 	sched = entity->rq->sched;
 
 	job->sched = sched;
@@ -598,6 +601,7 @@ static int drm_sched_main(void *param)
  * @hang_limit: number of times to allow a job to hang before dropping it
  * @timeout: timeout value in jiffies for the scheduler
  * @name: name used for debugging
+ * @ready: marks if the underlying HW is ready to work
  *
  * Return 0 on success, otherwise error code.
  */
@@ -606,7 +610,8 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 		   unsigned hw_submission,
 		   unsigned hang_limit,
 		   long timeout,
-		   const char *name)
+		   const char *name,
+		   bool	ready)
 {
 	int i;
 	sched->ops = ops;
@@ -633,6 +638,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 		return PTR_ERR(sched->thread);
 	}
 
+	sched->ready = ready;
 	return 0;
 }
 EXPORT_SYMBOL(drm_sched_init);
@@ -648,5 +654,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
 {
 	if (sched->thread)
 		kthread_stop(sched->thread);
+
+	sched->ready = false;
 }
 EXPORT_SYMBOL(drm_sched_fini);
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 80b641f..7cedb5f 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -212,7 +212,7 @@ v3d_sched_init(struct v3d_dev *v3d)
 			     &v3d_sched_ops,
 			     hw_jobs_limit, job_hang_limit,
 			     msecs_to_jiffies(hang_limit_ms),
-			     "v3d_bin");
+			     "v3d_bin", true);
 	if (ret) {
 		dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret);
 		return ret;
@@ -222,7 +222,7 @@ v3d_sched_init(struct v3d_dev *v3d)
 			     &v3d_sched_ops,
 			     hw_jobs_limit, job_hang_limit,
 			     msecs_to_jiffies(hang_limit_ms),
-			     "v3d_render");
+			     "v3d_render", true);
 	if (ret) {
 		dev_err(v3d->dev, "Failed to create render scheduler: %d.",
 			ret);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 0684dcd..037caea 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -264,6 +264,7 @@ struct drm_sched_backend_ops {
  * @hang_limit: once the hangs by a job crosses this limit then it is marked
  *              guilty and it will be considered for scheduling further.
  * @num_jobs: the number of jobs in queue in the scheduler
+ * @ready: marks if the underlying HW is ready to work
  *
  * One scheduler is implemented for each hardware ring.
  */
@@ -283,12 +284,14 @@ struct drm_gpu_scheduler {
 	spinlock_t			job_list_lock;
 	int				hang_limit;
 	atomic_t                        num_jobs;
+	bool			ready;
 };
 
 int drm_sched_init(struct drm_gpu_scheduler *sched,
 		   const struct drm_sched_backend_ops *ops,
 		   uint32_t hw_submission, unsigned hang_limit, long timeout,
-		   const char *name);
+		   const char *name,
+		   bool	ready);
 void drm_sched_fini(struct drm_gpu_scheduler *sched);
 int drm_sched_job_init(struct drm_sched_job *job,
 		       struct drm_sched_entity *entity,
-- 
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 2/2] drm/amdgpu: Retire amdgpu_ring.ready flag.
  2018-10-19 20:52 [PATCH v2 1/2] drm/sched: Add boolean to mark if sched is ready to work v2 Andrey Grodzovsky
@ 2018-10-19 20:52 ` Andrey Grodzovsky
       [not found]   ` <1539982364-6533-2-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
       [not found] ` <1539982364-6533-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 1 reply; 6+ messages in thread
From: Andrey Grodzovsky @ 2018-10-19 20:52 UTC (permalink / raw)
  To: amd-gfx, dri-devel; +Cc: Alexander.Deucher, christian.koenig

Start using drm_gpu_scheduler.ready isntead.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c        |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c            |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c           | 18 ++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c            |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c          |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h          |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c           |  2 +-
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c             |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c             | 14 ++++++-------
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c             | 12 ++++++------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c             | 24 ++++++++++++-----------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c             | 18 ++++++++---------
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c             |  2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c            |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c            |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c            | 14 ++++++-------
 drivers/gpu/drm/amd/amdgpu/si_dma.c               |  6 +++---
 drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c             |  6 +++---
 drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c             |  6 +++---
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c             | 10 +++++-----
 drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c             | 10 +++++-----
 drivers/gpu/drm/amd/amdgpu/vce_v2_0.c             |  4 ++--
 drivers/gpu/drm/amd/amdgpu/vce_v3_0.c             |  4 ++--
 drivers/gpu/drm/amd/amdgpu/vce_v4_0.c             |  6 +++---
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c             | 14 ++++++-------
 26 files changed, 105 insertions(+), 104 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index c31a884..eaa58bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -144,7 +144,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 				  KGD_MAX_QUEUES);
 
 		/* remove the KIQ bit as well */
-		if (adev->gfx.kiq.ring.ready)
+		if (adev->gfx.kiq.ring.sched.ready)
 			clear_bit(amdgpu_gfx_queue_to_bit(adev,
 							  adev->gfx.kiq.ring.me - 1,
 							  adev->gfx.kiq.ring.pipe,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 42cb4c4..f7819a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -876,7 +876,7 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 	if (adev->in_gpu_reset)
 		return -EIO;
 
-	if (ring->ready)
+	if (ring->sched.ready)
 		return invalidate_tlbs_with_kiq(adev, pasid);
 
 	for (vmid = 0; vmid < 16; vmid++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index b8963b7..fc74f40a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -146,7 +146,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		fence_ctx = 0;
 	}
 
-	if (!ring->ready) {
+	if (!ring->sched.ready) {
 		dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
 		return -EINVAL;
 	}
@@ -351,7 +351,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 		struct amdgpu_ring *ring = adev->rings[i];
 		long tmo;
 
-		if (!ring || !ring->ready)
+		if (!ring || !ring->sched.ready)
 			continue;
 
 		/* skip IB tests for KIQ in general for the below reasons:
@@ -375,7 +375,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 
 		r = amdgpu_ring_test_ib(ring, tmo);
 		if (r) {
-			ring->ready = false;
+			ring->sched.ready = false;
 
 			if (ring == &adev->gfx.gfx_ring[0]) {
 				/* oh, oh, that's really bad */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 50ece76..25307a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -336,7 +336,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 	case AMDGPU_HW_IP_GFX:
 		type = AMD_IP_BLOCK_TYPE_GFX;
 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-			if (adev->gfx.gfx_ring[i].ready)
+			if (adev->gfx.gfx_ring[i].sched.ready)
 				++num_rings;
 		ib_start_alignment = 32;
 		ib_size_alignment = 32;
@@ -344,7 +344,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 	case AMDGPU_HW_IP_COMPUTE:
 		type = AMD_IP_BLOCK_TYPE_GFX;
 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
-			if (adev->gfx.compute_ring[i].ready)
+			if (adev->gfx.compute_ring[i].sched.ready)
 				++num_rings;
 		ib_start_alignment = 32;
 		ib_size_alignment = 32;
@@ -352,7 +352,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 	case AMDGPU_HW_IP_DMA:
 		type = AMD_IP_BLOCK_TYPE_SDMA;
 		for (i = 0; i < adev->sdma.num_instances; i++)
-			if (adev->sdma.instance[i].ring.ready)
+			if (adev->sdma.instance[i].ring.sched.ready)
 				++num_rings;
 		ib_start_alignment = 256;
 		ib_size_alignment = 4;
@@ -363,7 +363,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 			if (adev->uvd.harvest_config & (1 << i))
 				continue;
 
-			if (adev->uvd.inst[i].ring.ready)
+			if (adev->uvd.inst[i].ring.sched.ready)
 				++num_rings;
 		}
 		ib_start_alignment = 64;
@@ -372,7 +372,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 	case AMDGPU_HW_IP_VCE:
 		type = AMD_IP_BLOCK_TYPE_VCE;
 		for (i = 0; i < adev->vce.num_rings; i++)
-			if (adev->vce.ring[i].ready)
+			if (adev->vce.ring[i].sched.ready)
 				++num_rings;
 		ib_start_alignment = 4;
 		ib_size_alignment = 1;
@@ -384,7 +384,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 				continue;
 
 			for (j = 0; j < adev->uvd.num_enc_rings; j++)
-				if (adev->uvd.inst[i].ring_enc[j].ready)
+				if (adev->uvd.inst[i].ring_enc[j].sched.ready)
 					++num_rings;
 		}
 		ib_start_alignment = 64;
@@ -392,7 +392,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 		break;
 	case AMDGPU_HW_IP_VCN_DEC:
 		type = AMD_IP_BLOCK_TYPE_VCN;
-		if (adev->vcn.ring_dec.ready)
+		if (adev->vcn.ring_dec.sched.ready)
 			++num_rings;
 		ib_start_alignment = 16;
 		ib_size_alignment = 16;
@@ -400,14 +400,14 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 	case AMDGPU_HW_IP_VCN_ENC:
 		type = AMD_IP_BLOCK_TYPE_VCN;
 		for (i = 0; i < adev->vcn.num_enc_rings; i++)
-			if (adev->vcn.ring_enc[i].ready)
+			if (adev->vcn.ring_enc[i].sched.ready)
 				++num_rings;
 		ib_start_alignment = 64;
 		ib_size_alignment = 1;
 		break;
 	case AMDGPU_HW_IP_VCN_JPEG:
 		type = AMD_IP_BLOCK_TYPE_VCN;
-		if (adev->vcn.ring_jpeg.ready)
+		if (adev->vcn.ring_jpeg.sched.ready)
 			++num_rings;
 		ib_start_alignment = 16;
 		ib_size_alignment = 16;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 59cc678..7235cd0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -2129,7 +2129,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
-		if (ring && ring->ready)
+		if (ring && ring->sched.ready)
 			amdgpu_fence_wait_empty(ring);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index b70e85e..d38d445 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -338,7 +338,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
  */
 void amdgpu_ring_fini(struct amdgpu_ring *ring)
 {
-	ring->ready = false;
+	ring->sched.ready = false;
 
 	/* Not to finish a ring which is not initialized */
 	if (!(ring->adev) || !(ring->adev->rings[ring->idx]))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 4caa301..569dfea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -189,7 +189,6 @@ struct amdgpu_ring {
 	uint64_t		gpu_addr;
 	uint64_t		ptr_mask;
 	uint32_t		buf_mask;
-	bool			ready;
 	u32			idx;
 	u32			me;
 	u32			pipe;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3a68028..d76895c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2069,7 +2069,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 	unsigned i;
 	int r;
 
-	if (direct_submit && !ring->ready) {
+	if (direct_submit && !ring->sched.ready) {
 		DRM_ERROR("Trying to move memory with ring turned off.\n");
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 32eb43d..9562935 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -316,8 +316,8 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
 		WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0);
 	}
-	sdma0->ready = false;
-	sdma1->ready = false;
+	sdma0->sched.ready = false;
+	sdma1->sched.ready = false;
 }
 
 /**
@@ -494,7 +494,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
 		/* enable DMA IBs */
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 
-		ring->ready = true;
+		ring->sched.ready = true;
 	}
 
 	cik_sdma_enable(adev, true);
@@ -503,7 +503,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
 		ring = &adev->sdma.instance[i].ring;
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
-			ring->ready = false;
+			ring->sched.ready = false;
 			return r;
 		}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 622dd70..98ffdb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -1950,9 +1950,9 @@ static void gfx_v6_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 				      CP_ME_CNTL__CE_HALT_MASK));
 		WREG32(mmSCRATCH_UMSK, 0);
 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-			adev->gfx.gfx_ring[i].ready = false;
+			adev->gfx.gfx_ring[i].sched.ready = false;
 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
-			adev->gfx.compute_ring[i].ready = false;
+			adev->gfx.compute_ring[i].sched.ready = false;
 	}
 	udelay(50);
 }
@@ -2124,10 +2124,10 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
 
 	/* start the rings */
 	gfx_v6_0_cp_gfx_start(adev);
-	ring->ready = true;
+	ring->sched.ready = true;
 	r = amdgpu_ring_test_ring(ring);
 	if (r) {
-		ring->ready = false;
+		ring->sched.ready = false;
 		return r;
 	}
 
@@ -2227,14 +2227,14 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev)
 	WREG32(mmCP_RB2_CNTL, tmp);
 	WREG32(mmCP_RB2_BASE, ring->gpu_addr >> 8);
 
-	adev->gfx.compute_ring[0].ready = false;
-	adev->gfx.compute_ring[1].ready = false;
+	adev->gfx.compute_ring[0].sched.ready = false;
+	adev->gfx.compute_ring[1].sched.ready = false;
 
 	for (i = 0; i < 2; i++) {
 		r = amdgpu_ring_test_ring(&adev->gfx.compute_ring[i]);
 		if (r)
 			return r;
-		adev->gfx.compute_ring[i].ready = true;
+		adev->gfx.compute_ring[i].sched.ready = true;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 9fadb32..5558075 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2403,7 +2403,7 @@ static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 	} else {
 		WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-			adev->gfx.gfx_ring[i].ready = false;
+			adev->gfx.gfx_ring[i].sched.ready = false;
 	}
 	udelay(50);
 }
@@ -2613,10 +2613,10 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
 
 	/* start the ring */
 	gfx_v7_0_cp_gfx_start(adev);
-	ring->ready = true;
+	ring->sched.ready = true;
 	r = amdgpu_ring_test_ring(ring);
 	if (r) {
-		ring->ready = false;
+		ring->sched.ready = false;
 		return r;
 	}
 
@@ -2675,7 +2675,7 @@ static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
 	} else {
 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
-			adev->gfx.compute_ring[i].ready = false;
+			adev->gfx.compute_ring[i].sched.ready = false;
 	}
 	udelay(50);
 }
@@ -3106,10 +3106,10 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
 
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		ring = &adev->gfx.compute_ring[i];
-		ring->ready = true;
+		ring->sched.ready = true;
 		r = amdgpu_ring_test_ring(ring);
 		if (r)
-			ring->ready = false;
+			ring->sched.ready = false;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 4e6d31f..6869d9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1629,7 +1629,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 		return 0;
 
 	/* bail if the compute ring is not ready */
-	if (!ring->ready)
+	if (!ring->sched.ready)
 		return 0;
 
 	tmp = RREG32(mmGB_EDC_MODE);
@@ -4197,7 +4197,7 @@ static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-			adev->gfx.gfx_ring[i].ready = false;
+			adev->gfx.gfx_ring[i].sched.ready = false;
 	}
 	WREG32(mmCP_ME_CNTL, tmp);
 	udelay(50);
@@ -4379,10 +4379,10 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
 	/* start the ring */
 	amdgpu_ring_clear_ring(ring);
 	gfx_v8_0_cp_gfx_start(adev);
-	ring->ready = true;
+	ring->sched.ready = true;
 	r = amdgpu_ring_test_ring(ring);
 	if (r)
-		ring->ready = false;
+		ring->sched.ready = false;
 
 	return r;
 }
@@ -4396,8 +4396,8 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
 	} else {
 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
-			adev->gfx.compute_ring[i].ready = false;
-		adev->gfx.kiq.ring.ready = false;
+			adev->gfx.compute_ring[i].sched.ready = false;
+		adev->gfx.kiq.ring.sched.ready = false;
 	}
 	udelay(50);
 }
@@ -4476,7 +4476,7 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
 	r = amdgpu_ring_test_ring(kiq_ring);
 	if (r) {
 		DRM_ERROR("KCQ enable failed\n");
-		kiq_ring->ready = false;
+		kiq_ring->sched.ready = false;
 	}
 	return r;
 }
@@ -4781,7 +4781,7 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
 	amdgpu_bo_kunmap(ring->mqd_obj);
 	ring->mqd_ptr = NULL;
 	amdgpu_bo_unreserve(ring->mqd_obj);
-	ring->ready = true;
+	ring->sched.ready = true;
 	return 0;
 }
 
@@ -4818,10 +4818,12 @@ static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
 	/* Test KCQs */
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		ring = &adev->gfx.compute_ring[i];
-		ring->ready = true;
+		ring->sched.ready = true;
 		r = amdgpu_ring_test_ring(ring);
-		if (r)
-			ring->ready = false;
+		if (r) {
+			ring->sched.ready = false;
+			DRM_ERROR("%d", ring->idx);
+		}
 	}
 
 done:
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0ce1e14..76839d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2537,7 +2537,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
 	if (!enable) {
 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-			adev->gfx.gfx_ring[i].ready = false;
+			adev->gfx.gfx_ring[i].sched.ready = false;
 	}
 	WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
 	udelay(50);
@@ -2727,7 +2727,7 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
 
 	/* start the ring */
 	gfx_v9_0_cp_gfx_start(adev);
-	ring->ready = true;
+	ring->sched.ready = true;
 
 	return 0;
 }
@@ -2742,8 +2742,8 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
 		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
-			adev->gfx.compute_ring[i].ready = false;
-		adev->gfx.kiq.ring.ready = false;
+			adev->gfx.compute_ring[i].sched.ready = false;
+		adev->gfx.kiq.ring.sched.ready = false;
 	}
 	udelay(50);
 }
@@ -2869,7 +2869,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
 	r = amdgpu_ring_test_ring(kiq_ring);
 	if (r) {
 		DRM_ERROR("KCQ enable failed\n");
-		kiq_ring->ready = false;
+		kiq_ring->sched.ready = false;
 	}
 
 	return r;
@@ -3249,7 +3249,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
 	amdgpu_bo_kunmap(ring->mqd_obj);
 	ring->mqd_ptr = NULL;
 	amdgpu_bo_unreserve(ring->mqd_obj);
-	ring->ready = true;
+	ring->sched.ready = true;
 	return 0;
 }
 
@@ -3316,17 +3316,17 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
 	ring = &adev->gfx.gfx_ring[0];
 	r = amdgpu_ring_test_ring(ring);
 	if (r) {
-		ring->ready = false;
+		ring->sched.ready = false;
 		return r;
 	}
 
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		ring = &adev->gfx.compute_ring[i];
 
-		ring->ready = true;
+		ring->sched.ready = true;
 		r = amdgpu_ring_test_ring(ring);
 		if (r)
-			ring->ready = false;
+			ring->sched.ready = false;
 	}
 
 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index f35d7a5..56fd3d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -381,7 +381,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
 		struct amdgpu_vmhub *hub = &adev->vmhub[i];
 		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
 
-		if (adev->gfx.kiq.ring.ready &&
+		if (adev->gfx.kiq.ring.sched.ready &&
 		    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
 		    !adev->in_gpu_reset) {
 			r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index bedbd5f..b00631c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -349,8 +349,8 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 	}
-	sdma0->ready = false;
-	sdma1->ready = false;
+	sdma0->sched.ready = false;
+	sdma1->sched.ready = false;
 }
 
 /**
@@ -471,7 +471,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
 		/* enable DMA IBs */
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 
-		ring->ready = true;
+		ring->sched.ready = true;
 	}
 
 	sdma_v2_4_enable(adev, true);
@@ -479,7 +479,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
 		ring = &adev->sdma.instance[i].ring;
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
-			ring->ready = false;
+			ring->sched.ready = false;
 			return r;
 		}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 415968d..533e130 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -523,8 +523,8 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 	}
-	sdma0->ready = false;
-	sdma1->ready = false;
+	sdma0->sched.ready = false;
+	sdma1->sched.ready = false;
 }
 
 /**
@@ -739,7 +739,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
 		/* enable DMA IBs */
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 
-		ring->ready = true;
+		ring->sched.ready = true;
 	}
 
 	/* unhalt the MEs */
@@ -751,7 +751,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
 		ring = &adev->sdma.instance[i].ring;
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
-			ring->ready = false;
+			ring->sched.ready = false;
 			return r;
 		}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 6ad4fda..031ed72 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -634,8 +634,8 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
 		WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
 	}
 
-	sdma0->ready = false;
-	sdma1->ready = false;
+	sdma0->sched.ready = false;
+	sdma1->sched.ready = false;
 }
 
 /**
@@ -675,8 +675,8 @@ static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
 		WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
 	}
 
-	sdma0->ready = false;
-	sdma1->ready = false;
+	sdma0->sched.ready = false;
+	sdma1->sched.ready = false;
 }
 
 /**
@@ -863,7 +863,7 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 	/* enable DMA IBs */
 	WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
 
-	ring->ready = true;
+	ring->sched.ready = true;
 }
 
 /**
@@ -956,7 +956,7 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
 	/* enable DMA IBs */
 	WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
 
-	ring->ready = true;
+	ring->sched.ready = true;
 }
 
 static void
@@ -1146,7 +1146,7 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
 
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
-			ring->ready = false;
+			ring->sched.ready = false;
 			return r;
 		}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index d9b27d7..ad28567 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -122,7 +122,7 @@ static void si_dma_stop(struct amdgpu_device *adev)
 
 		if (adev->mman.buffer_funcs_ring == ring)
 			amdgpu_ttm_set_buffer_funcs_status(adev, false);
-		ring->ready = false;
+		ring->sched.ready = false;
 	}
 }
 
@@ -175,11 +175,11 @@ static int si_dma_start(struct amdgpu_device *adev)
 		WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
 		WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
 
-		ring->ready = true;
+		ring->sched.ready = true;
 
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
-			ring->ready = false;
+			ring->sched.ready = false;
 			return r;
 		}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 1fc17bf..12bf064 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -162,10 +162,10 @@ static int uvd_v4_2_hw_init(void *handle)
 	uvd_v4_2_enable_mgcg(adev, true);
 	amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
 
-	ring->ready = true;
+	ring->sched.ready = true;
 	r = amdgpu_ring_test_ring(ring);
 	if (r) {
-		ring->ready = false;
+		ring->sched.ready = false;
 		goto done;
 	}
 
@@ -218,7 +218,7 @@ static int uvd_v4_2_hw_fini(void *handle)
 	if (RREG32(mmUVD_STATUS) != 0)
 		uvd_v4_2_stop(adev);
 
-	ring->ready = false;
+	ring->sched.ready = false;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index fde6ad5..ca1b332 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -158,10 +158,10 @@ static int uvd_v5_0_hw_init(void *handle)
 	uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
 	uvd_v5_0_enable_mgcg(adev, true);
 
-	ring->ready = true;
+	ring->sched.ready = true;
 	r = amdgpu_ring_test_ring(ring);
 	if (r) {
-		ring->ready = false;
+		ring->sched.ready = false;
 		goto done;
 	}
 
@@ -215,7 +215,7 @@ static int uvd_v5_0_hw_fini(void *handle)
 	if (RREG32(mmUVD_STATUS) != 0)
 		uvd_v5_0_stop(adev);
 
-	ring->ready = false;
+	ring->sched.ready = false;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 7a5b402..5725101 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -476,10 +476,10 @@ static int uvd_v6_0_hw_init(void *handle)
 	uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
 	uvd_v6_0_enable_mgcg(adev, true);
 
-	ring->ready = true;
+	ring->sched.ready = true;
 	r = amdgpu_ring_test_ring(ring);
 	if (r) {
-		ring->ready = false;
+		ring->sched.ready = false;
 		goto done;
 	}
 
@@ -513,10 +513,10 @@ static int uvd_v6_0_hw_init(void *handle)
 	if (uvd_v6_0_enc_support(adev)) {
 		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
 			ring = &adev->uvd.inst->ring_enc[i];
-			ring->ready = true;
+			ring->sched.ready = true;
 			r = amdgpu_ring_test_ring(ring);
 			if (r) {
-				ring->ready = false;
+				ring->sched.ready = false;
 				goto done;
 			}
 		}
@@ -548,7 +548,7 @@ static int uvd_v6_0_hw_fini(void *handle)
 	if (RREG32(mmUVD_STATUS) != 0)
 		uvd_v6_0_stop(adev);
 
-	ring->ready = false;
+	ring->sched.ready = false;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 58b39af..5edc317 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -540,10 +540,10 @@ static int uvd_v7_0_hw_init(void *handle)
 		ring = &adev->uvd.inst[j].ring;
 
 		if (!amdgpu_sriov_vf(adev)) {
-			ring->ready = true;
+			ring->sched.ready = true;
 			r = amdgpu_ring_test_ring(ring);
 			if (r) {
-				ring->ready = false;
+				ring->sched.ready = false;
 				goto done;
 			}
 
@@ -582,10 +582,10 @@ static int uvd_v7_0_hw_init(void *handle)
 
 		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
 			ring = &adev->uvd.inst[j].ring_enc[i];
-			ring->ready = true;
+			ring->sched.ready = true;
 			r = amdgpu_ring_test_ring(ring);
 			if (r) {
-				ring->ready = false;
+				ring->sched.ready = false;
 				goto done;
 			}
 		}
@@ -619,7 +619,7 @@ static int uvd_v7_0_hw_fini(void *handle)
 	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
 		if (adev->uvd.harvest_config & (1 << i))
 			continue;
-		adev->uvd.inst[i].ring.ready = false;
+		adev->uvd.inst[i].ring.sched.ready = false;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index ea28828..dcbba0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -464,14 +464,14 @@ static int vce_v2_0_hw_init(void *handle)
 	amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
 	vce_v2_0_enable_mgcg(adev, true, false);
 	for (i = 0; i < adev->vce.num_rings; i++)
-		adev->vce.ring[i].ready = false;
+		adev->vce.ring[i].sched.ready = false;
 
 	for (i = 0; i < adev->vce.num_rings; i++) {
 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
 		if (r)
 			return r;
 		else
-			adev->vce.ring[i].ready = true;
+			adev->vce.ring[i].sched.ready = true;
 	}
 
 	DRM_INFO("VCE initialized successfully.\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 6dbd397..ef17f9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -475,14 +475,14 @@ static int vce_v3_0_hw_init(void *handle)
 	amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
 
 	for (i = 0; i < adev->vce.num_rings; i++)
-		adev->vce.ring[i].ready = false;
+		adev->vce.ring[i].sched.ready = false;
 
 	for (i = 0; i < adev->vce.num_rings; i++) {
 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
 		if (r)
 			return r;
 		else
-			adev->vce.ring[i].ready = true;
+			adev->vce.ring[i].sched.ready = true;
 	}
 
 	DRM_INFO("VCE initialized successfully.\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 1c94718..742950a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -520,14 +520,14 @@ static int vce_v4_0_hw_init(void *handle)
 		return r;
 
 	for (i = 0; i < adev->vce.num_rings; i++)
-		adev->vce.ring[i].ready = false;
+		adev->vce.ring[i].sched.ready = false;
 
 	for (i = 0; i < adev->vce.num_rings; i++) {
 		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
 		if (r)
 			return r;
 		else
-			adev->vce.ring[i].ready = true;
+			adev->vce.ring[i].sched.ready = true;
 	}
 
 	DRM_INFO("VCE initialized successfully.\n");
@@ -549,7 +549,7 @@ static int vce_v4_0_hw_fini(void *handle)
 	}
 
 	for (i = 0; i < adev->vce.num_rings; i++)
-		adev->vce.ring[i].ready = false;
+		adev->vce.ring[i].sched.ready = false;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index eae9092..f1650da 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -176,28 +176,28 @@ static int vcn_v1_0_hw_init(void *handle)
 	struct amdgpu_ring *ring = &adev->vcn.ring_dec;
 	int i, r;
 
-	ring->ready = true;
+	ring->sched.ready = true;
 	r = amdgpu_ring_test_ring(ring);
 	if (r) {
-		ring->ready = false;
+		ring->sched.ready = false;
 		goto done;
 	}
 
 	for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
 		ring = &adev->vcn.ring_enc[i];
-		ring->ready = true;
+		ring->sched.ready = true;
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
-			ring->ready = false;
+			ring->sched.ready = false;
 			goto done;
 		}
 	}
 
 	ring = &adev->vcn.ring_jpeg;
-	ring->ready = true;
+	ring->sched.ready = true;
 	r = amdgpu_ring_test_ring(ring);
 	if (r) {
-		ring->ready = false;
+		ring->sched.ready = false;
 		goto done;
 	}
 
@@ -224,7 +224,7 @@ static int vcn_v1_0_hw_fini(void *handle)
 	if (RREG32_SOC15(VCN, 0, mmUVD_STATUS))
 		vcn_v1_0_stop(adev);
 
-	ring->ready = false;
+	ring->sched.ready = false;
 
 	return 0;
 }
-- 
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 1/2] drm/sched: Add boolean to mark if sched is ready to work v2
       [not found] ` <1539982364-6533-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
@ 2018-10-22  9:33   ` Koenig, Christian
       [not found]     ` <1eed2f47-90da-e518-1c14-3ad5156044a3-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Koenig, Christian @ 2018-10-22  9:33 UTC (permalink / raw)
  To: Grodzovsky, Andrey, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Deucher, Alexander

Am 19.10.18 um 22:52 schrieb Andrey Grodzovsky:
> Problem:
> A particular scheduler may become unsuable (underlying HW) after
> some event (e.g. GPU reset). If it's later chosen by
> the get free sched. policy a command will fail to be
> submitted.
>
> Fix:
> Add a driver specific callback to report the sched status so
> rq with bad sched can be avoided in favor of working one or
> none in which case job init will fail.
>
> v2: Switch from driver callback to flag in scheduler.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c |  2 +-
>   drivers/gpu/drm/etnaviv/etnaviv_sched.c   |  2 +-
>   drivers/gpu/drm/scheduler/sched_entity.c  |  9 ++++++++-
>   drivers/gpu/drm/scheduler/sched_main.c    | 10 +++++++++-
>   drivers/gpu/drm/v3d/v3d_sched.c           |  4 ++--
>   include/drm/gpu_scheduler.h               |  5 ++++-
>   6 files changed, 25 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index 5448cf2..bf845b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -450,7 +450,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
>   
>   		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
>   				   num_hw_submission, amdgpu_job_hang_limit,
> -				   timeout, ring->name);
> +				   timeout, ring->name, false);
>   		if (r) {
>   			DRM_ERROR("Failed to create scheduler on ring %s.\n",
>   				  ring->name);
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> index f8c5f1e..9dca347 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> @@ -178,7 +178,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
>   
>   	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
>   			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
> -			     msecs_to_jiffies(500), dev_name(gpu->dev));
> +			     msecs_to_jiffies(500), dev_name(gpu->dev), true);
>   	if (ret)
>   		return ret;
>   
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
> index 3e22a54..ba54c30 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -130,7 +130,14 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity *entity)
>   	int i;
>   
>   	for (i = 0; i < entity->num_rq_list; ++i) {
> -		num_jobs = atomic_read(&entity->rq_list[i]->sched->num_jobs);
> +		struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
> +
> +		if (!entity->rq_list[i]->sched->ready) {
> +			DRM_WARN("sched%s is not ready, skipping", sched->name);
> +			continue;
> +		}
> +
> +		num_jobs = atomic_read(&sched->num_jobs);
>   		if (num_jobs < min_jobs) {
>   			min_jobs = num_jobs;
>   			rq = entity->rq_list[i];
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 63b997d..772adec 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -420,6 +420,9 @@ int drm_sched_job_init(struct drm_sched_job *job,
>   	struct drm_gpu_scheduler *sched;
>   
>   	drm_sched_entity_select_rq(entity);
> +	if (!entity->rq)
> +		return -ENOENT;
> +
>   	sched = entity->rq->sched;
>   
>   	job->sched = sched;
> @@ -598,6 +601,7 @@ static int drm_sched_main(void *param)
>    * @hang_limit: number of times to allow a job to hang before dropping it
>    * @timeout: timeout value in jiffies for the scheduler
>    * @name: name used for debugging
> + * @ready: marks if the underlying HW is ready to work
>    *
>    * Return 0 on success, otherwise error code.
>    */
> @@ -606,7 +610,8 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>   		   unsigned hw_submission,
>   		   unsigned hang_limit,
>   		   long timeout,
> -		   const char *name)
> +		   const char *name,
> +		   bool	ready)

Please drop the ready flag here. We should consider a scheduler ready as 
soon as it is initialized.

Apart from that looks good to me,
Christian.

>   {
>   	int i;
>   	sched->ops = ops;
> @@ -633,6 +638,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>   		return PTR_ERR(sched->thread);
>   	}
>   
> +	sched->ready = ready;
>   	return 0;
>   }
>   EXPORT_SYMBOL(drm_sched_init);
> @@ -648,5 +654,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
>   {
>   	if (sched->thread)
>   		kthread_stop(sched->thread);
> +
> +	sched->ready = false;
>   }
>   EXPORT_SYMBOL(drm_sched_fini);
> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
> index 80b641f..7cedb5f 100644
> --- a/drivers/gpu/drm/v3d/v3d_sched.c
> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
> @@ -212,7 +212,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>   			     &v3d_sched_ops,
>   			     hw_jobs_limit, job_hang_limit,
>   			     msecs_to_jiffies(hang_limit_ms),
> -			     "v3d_bin");
> +			     "v3d_bin", true);
>   	if (ret) {
>   		dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret);
>   		return ret;
> @@ -222,7 +222,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>   			     &v3d_sched_ops,
>   			     hw_jobs_limit, job_hang_limit,
>   			     msecs_to_jiffies(hang_limit_ms),
> -			     "v3d_render");
> +			     "v3d_render", true);
>   	if (ret) {
>   		dev_err(v3d->dev, "Failed to create render scheduler: %d.",
>   			ret);
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 0684dcd..037caea 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -264,6 +264,7 @@ struct drm_sched_backend_ops {
>    * @hang_limit: once the hangs by a job crosses this limit then it is marked
>    *              guilty and it will be considered for scheduling further.
>    * @num_jobs: the number of jobs in queue in the scheduler
> + * @ready: marks if the underlying HW is ready to work
>    *
>    * One scheduler is implemented for each hardware ring.
>    */
> @@ -283,12 +284,14 @@ struct drm_gpu_scheduler {
>   	spinlock_t			job_list_lock;
>   	int				hang_limit;
>   	atomic_t                        num_jobs;
> +	bool			ready;
>   };
>   
>   int drm_sched_init(struct drm_gpu_scheduler *sched,
>   		   const struct drm_sched_backend_ops *ops,
>   		   uint32_t hw_submission, unsigned hang_limit, long timeout,
> -		   const char *name);
> +		   const char *name,
> +		   bool	ready);
>   void drm_sched_fini(struct drm_gpu_scheduler *sched);
>   int drm_sched_job_init(struct drm_sched_job *job,
>   		       struct drm_sched_entity *entity,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 2/2] drm/amdgpu: Retire amdgpu_ring.ready flag.
       [not found]   ` <1539982364-6533-2-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
@ 2018-10-22  9:40     ` Koenig, Christian
  0 siblings, 0 replies; 6+ messages in thread
From: Koenig, Christian @ 2018-10-22  9:40 UTC (permalink / raw)
  To: Grodzovsky, Andrey, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Deucher, Alexander

Am 19.10.18 um 22:52 schrieb Andrey Grodzovsky:
> Start using drm_gpu_scheduler.ready isntead.

Please drop all occurrences of setting sched.ready manually around the 
ring tests.

Instead add a helper function into amdgpu_ring.c which does the ring 
tests and sets ready depending on the result.

Regards,
Christian.

>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c        |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c            |  6 +++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c           | 18 ++++++++---------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c            |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c          |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h          |  1 -
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c           |  2 +-
>   drivers/gpu/drm/amd/amdgpu/cik_sdma.c             |  8 ++++----
>   drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c             | 14 ++++++-------
>   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c             | 12 ++++++------
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c             | 24 ++++++++++++-----------
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c             | 18 ++++++++---------
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c             |  2 +-
>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c            |  8 ++++----
>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c            |  8 ++++----
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c            | 14 ++++++-------
>   drivers/gpu/drm/amd/amdgpu/si_dma.c               |  6 +++---
>   drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c             |  6 +++---
>   drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c             |  6 +++---
>   drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c             | 10 +++++-----
>   drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c             | 10 +++++-----
>   drivers/gpu/drm/amd/amdgpu/vce_v2_0.c             |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/vce_v3_0.c             |  4 ++--
>   drivers/gpu/drm/amd/amdgpu/vce_v4_0.c             |  6 +++---
>   drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c             | 14 ++++++-------
>   26 files changed, 105 insertions(+), 104 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index c31a884..eaa58bb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -144,7 +144,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>   				  KGD_MAX_QUEUES);
>   
>   		/* remove the KIQ bit as well */
> -		if (adev->gfx.kiq.ring.ready)
> +		if (adev->gfx.kiq.ring.sched.ready)
>   			clear_bit(amdgpu_gfx_queue_to_bit(adev,
>   							  adev->gfx.kiq.ring.me - 1,
>   							  adev->gfx.kiq.ring.pipe,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> index 42cb4c4..f7819a5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
> @@ -876,7 +876,7 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
>   	if (adev->in_gpu_reset)
>   		return -EIO;
>   
> -	if (ring->ready)
> +	if (ring->sched.ready)
>   		return invalidate_tlbs_with_kiq(adev, pasid);
>   
>   	for (vmid = 0; vmid < 16; vmid++) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index b8963b7..fc74f40a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -146,7 +146,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   		fence_ctx = 0;
>   	}
>   
> -	if (!ring->ready) {
> +	if (!ring->sched.ready) {
>   		dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name);
>   		return -EINVAL;
>   	}
> @@ -351,7 +351,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
>   		struct amdgpu_ring *ring = adev->rings[i];
>   		long tmo;
>   
> -		if (!ring || !ring->ready)
> +		if (!ring || !ring->sched.ready)
>   			continue;
>   
>   		/* skip IB tests for KIQ in general for the below reasons:
> @@ -375,7 +375,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
>   
>   		r = amdgpu_ring_test_ib(ring, tmo);
>   		if (r) {
> -			ring->ready = false;
> +			ring->sched.ready = false;
>   
>   			if (ring == &adev->gfx.gfx_ring[0]) {
>   				/* oh, oh, that's really bad */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 50ece76..25307a4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -336,7 +336,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   	case AMDGPU_HW_IP_GFX:
>   		type = AMD_IP_BLOCK_TYPE_GFX;
>   		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> -			if (adev->gfx.gfx_ring[i].ready)
> +			if (adev->gfx.gfx_ring[i].sched.ready)
>   				++num_rings;
>   		ib_start_alignment = 32;
>   		ib_size_alignment = 32;
> @@ -344,7 +344,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   	case AMDGPU_HW_IP_COMPUTE:
>   		type = AMD_IP_BLOCK_TYPE_GFX;
>   		for (i = 0; i < adev->gfx.num_compute_rings; i++)
> -			if (adev->gfx.compute_ring[i].ready)
> +			if (adev->gfx.compute_ring[i].sched.ready)
>   				++num_rings;
>   		ib_start_alignment = 32;
>   		ib_size_alignment = 32;
> @@ -352,7 +352,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   	case AMDGPU_HW_IP_DMA:
>   		type = AMD_IP_BLOCK_TYPE_SDMA;
>   		for (i = 0; i < adev->sdma.num_instances; i++)
> -			if (adev->sdma.instance[i].ring.ready)
> +			if (adev->sdma.instance[i].ring.sched.ready)
>   				++num_rings;
>   		ib_start_alignment = 256;
>   		ib_size_alignment = 4;
> @@ -363,7 +363,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   			if (adev->uvd.harvest_config & (1 << i))
>   				continue;
>   
> -			if (adev->uvd.inst[i].ring.ready)
> +			if (adev->uvd.inst[i].ring.sched.ready)
>   				++num_rings;
>   		}
>   		ib_start_alignment = 64;
> @@ -372,7 +372,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   	case AMDGPU_HW_IP_VCE:
>   		type = AMD_IP_BLOCK_TYPE_VCE;
>   		for (i = 0; i < adev->vce.num_rings; i++)
> -			if (adev->vce.ring[i].ready)
> +			if (adev->vce.ring[i].sched.ready)
>   				++num_rings;
>   		ib_start_alignment = 4;
>   		ib_size_alignment = 1;
> @@ -384,7 +384,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   				continue;
>   
>   			for (j = 0; j < adev->uvd.num_enc_rings; j++)
> -				if (adev->uvd.inst[i].ring_enc[j].ready)
> +				if (adev->uvd.inst[i].ring_enc[j].sched.ready)
>   					++num_rings;
>   		}
>   		ib_start_alignment = 64;
> @@ -392,7 +392,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   		break;
>   	case AMDGPU_HW_IP_VCN_DEC:
>   		type = AMD_IP_BLOCK_TYPE_VCN;
> -		if (adev->vcn.ring_dec.ready)
> +		if (adev->vcn.ring_dec.sched.ready)
>   			++num_rings;
>   		ib_start_alignment = 16;
>   		ib_size_alignment = 16;
> @@ -400,14 +400,14 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
>   	case AMDGPU_HW_IP_VCN_ENC:
>   		type = AMD_IP_BLOCK_TYPE_VCN;
>   		for (i = 0; i < adev->vcn.num_enc_rings; i++)
> -			if (adev->vcn.ring_enc[i].ready)
> +			if (adev->vcn.ring_enc[i].sched.ready)
>   				++num_rings;
>   		ib_start_alignment = 64;
>   		ib_size_alignment = 1;
>   		break;
>   	case AMDGPU_HW_IP_VCN_JPEG:
>   		type = AMD_IP_BLOCK_TYPE_VCN;
> -		if (adev->vcn.ring_jpeg.ready)
> +		if (adev->vcn.ring_jpeg.sched.ready)
>   			++num_rings;
>   		ib_start_alignment = 16;
>   		ib_size_alignment = 16;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> index 59cc678..7235cd0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> @@ -2129,7 +2129,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
>   
>   	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
>   		struct amdgpu_ring *ring = adev->rings[i];
> -		if (ring && ring->ready)
> +		if (ring && ring->sched.ready)
>   			amdgpu_fence_wait_empty(ring);
>   	}
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index b70e85e..d38d445 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -338,7 +338,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
>    */
>   void amdgpu_ring_fini(struct amdgpu_ring *ring)
>   {
> -	ring->ready = false;
> +	ring->sched.ready = false;
>   
>   	/* Not to finish a ring which is not initialized */
>   	if (!(ring->adev) || !(ring->adev->rings[ring->idx]))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 4caa301..569dfea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -189,7 +189,6 @@ struct amdgpu_ring {
>   	uint64_t		gpu_addr;
>   	uint64_t		ptr_mask;
>   	uint32_t		buf_mask;
> -	bool			ready;
>   	u32			idx;
>   	u32			me;
>   	u32			pipe;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 3a68028..d76895c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -2069,7 +2069,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
>   	unsigned i;
>   	int r;
>   
> -	if (direct_submit && !ring->ready) {
> +	if (direct_submit && !ring->sched.ready) {
>   		DRM_ERROR("Trying to move memory with ring turned off.\n");
>   		return -EINVAL;
>   	}
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> index 32eb43d..9562935 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> @@ -316,8 +316,8 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
>   		WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
>   		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0);
>   	}
> -	sdma0->ready = false;
> -	sdma1->ready = false;
> +	sdma0->sched.ready = false;
> +	sdma1->sched.ready = false;
>   }
>   
>   /**
> @@ -494,7 +494,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
>   		/* enable DMA IBs */
>   		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
>   
> -		ring->ready = true;
> +		ring->sched.ready = true;
>   	}
>   
>   	cik_sdma_enable(adev, true);
> @@ -503,7 +503,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
>   		ring = &adev->sdma.instance[i].ring;
>   		r = amdgpu_ring_test_ring(ring);
>   		if (r) {
> -			ring->ready = false;
> +			ring->sched.ready = false;
>   			return r;
>   		}
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
> index 622dd70..98ffdb9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
> @@ -1950,9 +1950,9 @@ static void gfx_v6_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
>   				      CP_ME_CNTL__CE_HALT_MASK));
>   		WREG32(mmSCRATCH_UMSK, 0);
>   		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> -			adev->gfx.gfx_ring[i].ready = false;
> +			adev->gfx.gfx_ring[i].sched.ready = false;
>   		for (i = 0; i < adev->gfx.num_compute_rings; i++)
> -			adev->gfx.compute_ring[i].ready = false;
> +			adev->gfx.compute_ring[i].sched.ready = false;
>   	}
>   	udelay(50);
>   }
> @@ -2124,10 +2124,10 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev)
>   
>   	/* start the rings */
>   	gfx_v6_0_cp_gfx_start(adev);
> -	ring->ready = true;
> +	ring->sched.ready = true;
>   	r = amdgpu_ring_test_ring(ring);
>   	if (r) {
> -		ring->ready = false;
> +		ring->sched.ready = false;
>   		return r;
>   	}
>   
> @@ -2227,14 +2227,14 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev)
>   	WREG32(mmCP_RB2_CNTL, tmp);
>   	WREG32(mmCP_RB2_BASE, ring->gpu_addr >> 8);
>   
> -	adev->gfx.compute_ring[0].ready = false;
> -	adev->gfx.compute_ring[1].ready = false;
> +	adev->gfx.compute_ring[0].sched.ready = false;
> +	adev->gfx.compute_ring[1].sched.ready = false;
>   
>   	for (i = 0; i < 2; i++) {
>   		r = amdgpu_ring_test_ring(&adev->gfx.compute_ring[i]);
>   		if (r)
>   			return r;
> -		adev->gfx.compute_ring[i].ready = true;
> +		adev->gfx.compute_ring[i].sched.ready = true;
>   	}
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 9fadb32..5558075 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -2403,7 +2403,7 @@ static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
>   	} else {
>   		WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK));
>   		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> -			adev->gfx.gfx_ring[i].ready = false;
> +			adev->gfx.gfx_ring[i].sched.ready = false;
>   	}
>   	udelay(50);
>   }
> @@ -2613,10 +2613,10 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
>   
>   	/* start the ring */
>   	gfx_v7_0_cp_gfx_start(adev);
> -	ring->ready = true;
> +	ring->sched.ready = true;
>   	r = amdgpu_ring_test_ring(ring);
>   	if (r) {
> -		ring->ready = false;
> +		ring->sched.ready = false;
>   		return r;
>   	}
>   
> @@ -2675,7 +2675,7 @@ static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
>   	} else {
>   		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
>   		for (i = 0; i < adev->gfx.num_compute_rings; i++)
> -			adev->gfx.compute_ring[i].ready = false;
> +			adev->gfx.compute_ring[i].sched.ready = false;
>   	}
>   	udelay(50);
>   }
> @@ -3106,10 +3106,10 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
>   
>   	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
>   		ring = &adev->gfx.compute_ring[i];
> -		ring->ready = true;
> +		ring->sched.ready = true;
>   		r = amdgpu_ring_test_ring(ring);
>   		if (r)
> -			ring->ready = false;
> +			ring->sched.ready = false;
>   	}
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 4e6d31f..6869d9b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -1629,7 +1629,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
>   		return 0;
>   
>   	/* bail if the compute ring is not ready */
> -	if (!ring->ready)
> +	if (!ring->sched.ready)
>   		return 0;
>   
>   	tmp = RREG32(mmGB_EDC_MODE);
> @@ -4197,7 +4197,7 @@ static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
>   		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
>   		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
>   		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> -			adev->gfx.gfx_ring[i].ready = false;
> +			adev->gfx.gfx_ring[i].sched.ready = false;
>   	}
>   	WREG32(mmCP_ME_CNTL, tmp);
>   	udelay(50);
> @@ -4379,10 +4379,10 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
>   	/* start the ring */
>   	amdgpu_ring_clear_ring(ring);
>   	gfx_v8_0_cp_gfx_start(adev);
> -	ring->ready = true;
> +	ring->sched.ready = true;
>   	r = amdgpu_ring_test_ring(ring);
>   	if (r)
> -		ring->ready = false;
> +		ring->sched.ready = false;
>   
>   	return r;
>   }
> @@ -4396,8 +4396,8 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
>   	} else {
>   		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
>   		for (i = 0; i < adev->gfx.num_compute_rings; i++)
> -			adev->gfx.compute_ring[i].ready = false;
> -		adev->gfx.kiq.ring.ready = false;
> +			adev->gfx.compute_ring[i].sched.ready = false;
> +		adev->gfx.kiq.ring.sched.ready = false;
>   	}
>   	udelay(50);
>   }
> @@ -4476,7 +4476,7 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
>   	r = amdgpu_ring_test_ring(kiq_ring);
>   	if (r) {
>   		DRM_ERROR("KCQ enable failed\n");
> -		kiq_ring->ready = false;
> +		kiq_ring->sched.ready = false;
>   	}
>   	return r;
>   }
> @@ -4781,7 +4781,7 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
>   	amdgpu_bo_kunmap(ring->mqd_obj);
>   	ring->mqd_ptr = NULL;
>   	amdgpu_bo_unreserve(ring->mqd_obj);
> -	ring->ready = true;
> +	ring->sched.ready = true;
>   	return 0;
>   }
>   
> @@ -4818,10 +4818,12 @@ static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
>   	/* Test KCQs */
>   	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
>   		ring = &adev->gfx.compute_ring[i];
> -		ring->ready = true;
> +		ring->sched.ready = true;
>   		r = amdgpu_ring_test_ring(ring);
> -		if (r)
> -			ring->ready = false;
> +		if (r) {
> +			ring->sched.ready = false;
> +			DRM_ERROR("%d", ring->idx);
> +		}
>   	}
>   
>   done:
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 0ce1e14..76839d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -2537,7 +2537,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
>   	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
>   	if (!enable) {
>   		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> -			adev->gfx.gfx_ring[i].ready = false;
> +			adev->gfx.gfx_ring[i].sched.ready = false;
>   	}
>   	WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
>   	udelay(50);
> @@ -2727,7 +2727,7 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
>   
>   	/* start the ring */
>   	gfx_v9_0_cp_gfx_start(adev);
> -	ring->ready = true;
> +	ring->sched.ready = true;
>   
>   	return 0;
>   }
> @@ -2742,8 +2742,8 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
>   		WREG32_SOC15(GC, 0, mmCP_MEC_CNTL,
>   			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
>   		for (i = 0; i < adev->gfx.num_compute_rings; i++)
> -			adev->gfx.compute_ring[i].ready = false;
> -		adev->gfx.kiq.ring.ready = false;
> +			adev->gfx.compute_ring[i].sched.ready = false;
> +		adev->gfx.kiq.ring.sched.ready = false;
>   	}
>   	udelay(50);
>   }
> @@ -2869,7 +2869,7 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
>   	r = amdgpu_ring_test_ring(kiq_ring);
>   	if (r) {
>   		DRM_ERROR("KCQ enable failed\n");
> -		kiq_ring->ready = false;
> +		kiq_ring->sched.ready = false;
>   	}
>   
>   	return r;
> @@ -3249,7 +3249,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
>   	amdgpu_bo_kunmap(ring->mqd_obj);
>   	ring->mqd_ptr = NULL;
>   	amdgpu_bo_unreserve(ring->mqd_obj);
> -	ring->ready = true;
> +	ring->sched.ready = true;
>   	return 0;
>   }
>   
> @@ -3316,17 +3316,17 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
>   	ring = &adev->gfx.gfx_ring[0];
>   	r = amdgpu_ring_test_ring(ring);
>   	if (r) {
> -		ring->ready = false;
> +		ring->sched.ready = false;
>   		return r;
>   	}
>   
>   	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
>   		ring = &adev->gfx.compute_ring[i];
>   
> -		ring->ready = true;
> +		ring->sched.ready = true;
>   		r = amdgpu_ring_test_ring(ring);
>   		if (r)
> -			ring->ready = false;
> +			ring->sched.ready = false;
>   	}
>   
>   	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index f35d7a5..56fd3d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -381,7 +381,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
>   		struct amdgpu_vmhub *hub = &adev->vmhub[i];
>   		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
>   
> -		if (adev->gfx.kiq.ring.ready &&
> +		if (adev->gfx.kiq.ring.sched.ready &&
>   		    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
>   		    !adev->in_gpu_reset) {
>   			r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng,
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> index bedbd5f..b00631c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> @@ -349,8 +349,8 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
>   		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
>   		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
>   	}
> -	sdma0->ready = false;
> -	sdma1->ready = false;
> +	sdma0->sched.ready = false;
> +	sdma1->sched.ready = false;
>   }
>   
>   /**
> @@ -471,7 +471,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
>   		/* enable DMA IBs */
>   		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
>   
> -		ring->ready = true;
> +		ring->sched.ready = true;
>   	}
>   
>   	sdma_v2_4_enable(adev, true);
> @@ -479,7 +479,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
>   		ring = &adev->sdma.instance[i].ring;
>   		r = amdgpu_ring_test_ring(ring);
>   		if (r) {
> -			ring->ready = false;
> +			ring->sched.ready = false;
>   			return r;
>   		}
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index 415968d..533e130 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -523,8 +523,8 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
>   		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
>   		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
>   	}
> -	sdma0->ready = false;
> -	sdma1->ready = false;
> +	sdma0->sched.ready = false;
> +	sdma1->sched.ready = false;
>   }
>   
>   /**
> @@ -739,7 +739,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
>   		/* enable DMA IBs */
>   		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
>   
> -		ring->ready = true;
> +		ring->sched.ready = true;
>   	}
>   
>   	/* unhalt the MEs */
> @@ -751,7 +751,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
>   		ring = &adev->sdma.instance[i].ring;
>   		r = amdgpu_ring_test_ring(ring);
>   		if (r) {
> -			ring->ready = false;
> +			ring->sched.ready = false;
>   			return r;
>   		}
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 6ad4fda..031ed72 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -634,8 +634,8 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
>   		WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
>   	}
>   
> -	sdma0->ready = false;
> -	sdma1->ready = false;
> +	sdma0->sched.ready = false;
> +	sdma1->sched.ready = false;
>   }
>   
>   /**
> @@ -675,8 +675,8 @@ static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
>   		WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
>   	}
>   
> -	sdma0->ready = false;
> -	sdma1->ready = false;
> +	sdma0->sched.ready = false;
> +	sdma1->sched.ready = false;
>   }
>   
>   /**
> @@ -863,7 +863,7 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
>   	/* enable DMA IBs */
>   	WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
>   
> -	ring->ready = true;
> +	ring->sched.ready = true;
>   }
>   
>   /**
> @@ -956,7 +956,7 @@ static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
>   	/* enable DMA IBs */
>   	WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
>   
> -	ring->ready = true;
> +	ring->sched.ready = true;
>   }
>   
>   static void
> @@ -1146,7 +1146,7 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
>   
>   		r = amdgpu_ring_test_ring(ring);
>   		if (r) {
> -			ring->ready = false;
> +			ring->sched.ready = false;
>   			return r;
>   		}
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> index d9b27d7..ad28567 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> @@ -122,7 +122,7 @@ static void si_dma_stop(struct amdgpu_device *adev)
>   
>   		if (adev->mman.buffer_funcs_ring == ring)
>   			amdgpu_ttm_set_buffer_funcs_status(adev, false);
> -		ring->ready = false;
> +		ring->sched.ready = false;
>   	}
>   }
>   
> @@ -175,11 +175,11 @@ static int si_dma_start(struct amdgpu_device *adev)
>   		WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
>   		WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
>   
> -		ring->ready = true;
> +		ring->sched.ready = true;
>   
>   		r = amdgpu_ring_test_ring(ring);
>   		if (r) {
> -			ring->ready = false;
> +			ring->sched.ready = false;
>   			return r;
>   		}
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> index 1fc17bf..12bf064 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
> @@ -162,10 +162,10 @@ static int uvd_v4_2_hw_init(void *handle)
>   	uvd_v4_2_enable_mgcg(adev, true);
>   	amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
>   
> -	ring->ready = true;
> +	ring->sched.ready = true;
>   	r = amdgpu_ring_test_ring(ring);
>   	if (r) {
> -		ring->ready = false;
> +		ring->sched.ready = false;
>   		goto done;
>   	}
>   
> @@ -218,7 +218,7 @@ static int uvd_v4_2_hw_fini(void *handle)
>   	if (RREG32(mmUVD_STATUS) != 0)
>   		uvd_v4_2_stop(adev);
>   
> -	ring->ready = false;
> +	ring->sched.ready = false;
>   
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
> index fde6ad5..ca1b332 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
> @@ -158,10 +158,10 @@ static int uvd_v5_0_hw_init(void *handle)
>   	uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
>   	uvd_v5_0_enable_mgcg(adev, true);
>   
> -	ring->ready = true;
> +	ring->sched.ready = true;
>   	r = amdgpu_ring_test_ring(ring);
>   	if (r) {
> -		ring->ready = false;
> +		ring->sched.ready = false;
>   		goto done;
>   	}
>   
> @@ -215,7 +215,7 @@ static int uvd_v5_0_hw_fini(void *handle)
>   	if (RREG32(mmUVD_STATUS) != 0)
>   		uvd_v5_0_stop(adev);
>   
> -	ring->ready = false;
> +	ring->sched.ready = false;
>   
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> index 7a5b402..5725101 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
> @@ -476,10 +476,10 @@ static int uvd_v6_0_hw_init(void *handle)
>   	uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
>   	uvd_v6_0_enable_mgcg(adev, true);
>   
> -	ring->ready = true;
> +	ring->sched.ready = true;
>   	r = amdgpu_ring_test_ring(ring);
>   	if (r) {
> -		ring->ready = false;
> +		ring->sched.ready = false;
>   		goto done;
>   	}
>   
> @@ -513,10 +513,10 @@ static int uvd_v6_0_hw_init(void *handle)
>   	if (uvd_v6_0_enc_support(adev)) {
>   		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
>   			ring = &adev->uvd.inst->ring_enc[i];
> -			ring->ready = true;
> +			ring->sched.ready = true;
>   			r = amdgpu_ring_test_ring(ring);
>   			if (r) {
> -				ring->ready = false;
> +				ring->sched.ready = false;
>   				goto done;
>   			}
>   		}
> @@ -548,7 +548,7 @@ static int uvd_v6_0_hw_fini(void *handle)
>   	if (RREG32(mmUVD_STATUS) != 0)
>   		uvd_v6_0_stop(adev);
>   
> -	ring->ready = false;
> +	ring->sched.ready = false;
>   
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> index 58b39af..5edc317 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> @@ -540,10 +540,10 @@ static int uvd_v7_0_hw_init(void *handle)
>   		ring = &adev->uvd.inst[j].ring;
>   
>   		if (!amdgpu_sriov_vf(adev)) {
> -			ring->ready = true;
> +			ring->sched.ready = true;
>   			r = amdgpu_ring_test_ring(ring);
>   			if (r) {
> -				ring->ready = false;
> +				ring->sched.ready = false;
>   				goto done;
>   			}
>   
> @@ -582,10 +582,10 @@ static int uvd_v7_0_hw_init(void *handle)
>   
>   		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
>   			ring = &adev->uvd.inst[j].ring_enc[i];
> -			ring->ready = true;
> +			ring->sched.ready = true;
>   			r = amdgpu_ring_test_ring(ring);
>   			if (r) {
> -				ring->ready = false;
> +				ring->sched.ready = false;
>   				goto done;
>   			}
>   		}
> @@ -619,7 +619,7 @@ static int uvd_v7_0_hw_fini(void *handle)
>   	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
>   		if (adev->uvd.harvest_config & (1 << i))
>   			continue;
> -		adev->uvd.inst[i].ring.ready = false;
> +		adev->uvd.inst[i].ring.sched.ready = false;
>   	}
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> index ea28828..dcbba0b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
> @@ -464,14 +464,14 @@ static int vce_v2_0_hw_init(void *handle)
>   	amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
>   	vce_v2_0_enable_mgcg(adev, true, false);
>   	for (i = 0; i < adev->vce.num_rings; i++)
> -		adev->vce.ring[i].ready = false;
> +		adev->vce.ring[i].sched.ready = false;
>   
>   	for (i = 0; i < adev->vce.num_rings; i++) {
>   		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
>   		if (r)
>   			return r;
>   		else
> -			adev->vce.ring[i].ready = true;
> +			adev->vce.ring[i].sched.ready = true;
>   	}
>   
>   	DRM_INFO("VCE initialized successfully.\n");
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> index 6dbd397..ef17f9e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
> @@ -475,14 +475,14 @@ static int vce_v3_0_hw_init(void *handle)
>   	amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
>   
>   	for (i = 0; i < adev->vce.num_rings; i++)
> -		adev->vce.ring[i].ready = false;
> +		adev->vce.ring[i].sched.ready = false;
>   
>   	for (i = 0; i < adev->vce.num_rings; i++) {
>   		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
>   		if (r)
>   			return r;
>   		else
> -			adev->vce.ring[i].ready = true;
> +			adev->vce.ring[i].sched.ready = true;
>   	}
>   
>   	DRM_INFO("VCE initialized successfully.\n");
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> index 1c94718..742950a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> @@ -520,14 +520,14 @@ static int vce_v4_0_hw_init(void *handle)
>   		return r;
>   
>   	for (i = 0; i < adev->vce.num_rings; i++)
> -		adev->vce.ring[i].ready = false;
> +		adev->vce.ring[i].sched.ready = false;
>   
>   	for (i = 0; i < adev->vce.num_rings; i++) {
>   		r = amdgpu_ring_test_ring(&adev->vce.ring[i]);
>   		if (r)
>   			return r;
>   		else
> -			adev->vce.ring[i].ready = true;
> +			adev->vce.ring[i].sched.ready = true;
>   	}
>   
>   	DRM_INFO("VCE initialized successfully.\n");
> @@ -549,7 +549,7 @@ static int vce_v4_0_hw_fini(void *handle)
>   	}
>   
>   	for (i = 0; i < adev->vce.num_rings; i++)
> -		adev->vce.ring[i].ready = false;
> +		adev->vce.ring[i].sched.ready = false;
>   
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> index eae9092..f1650da 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
> @@ -176,28 +176,28 @@ static int vcn_v1_0_hw_init(void *handle)
>   	struct amdgpu_ring *ring = &adev->vcn.ring_dec;
>   	int i, r;
>   
> -	ring->ready = true;
> +	ring->sched.ready = true;
>   	r = amdgpu_ring_test_ring(ring);
>   	if (r) {
> -		ring->ready = false;
> +		ring->sched.ready = false;
>   		goto done;
>   	}
>   
>   	for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
>   		ring = &adev->vcn.ring_enc[i];
> -		ring->ready = true;
> +		ring->sched.ready = true;
>   		r = amdgpu_ring_test_ring(ring);
>   		if (r) {
> -			ring->ready = false;
> +			ring->sched.ready = false;
>   			goto done;
>   		}
>   	}
>   
>   	ring = &adev->vcn.ring_jpeg;
> -	ring->ready = true;
> +	ring->sched.ready = true;
>   	r = amdgpu_ring_test_ring(ring);
>   	if (r) {
> -		ring->ready = false;
> +		ring->sched.ready = false;
>   		goto done;
>   	}
>   
> @@ -224,7 +224,7 @@ static int vcn_v1_0_hw_fini(void *handle)
>   	if (RREG32_SOC15(VCN, 0, mmUVD_STATUS))
>   		vcn_v1_0_stop(adev);
>   
> -	ring->ready = false;
> +	ring->sched.ready = false;
>   
>   	return 0;
>   }

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 1/2] drm/sched: Add boolean to mark if sched is ready to work v2
       [not found]     ` <1eed2f47-90da-e518-1c14-3ad5156044a3-5C7GfCeVMHo@public.gmane.org>
@ 2018-10-23 14:23       ` Grodzovsky, Andrey
  2018-10-24  7:01         ` Koenig, Christian
  0 siblings, 1 reply; 6+ messages in thread
From: Grodzovsky, Andrey @ 2018-10-23 14:23 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Deucher, Alexander



On 10/22/2018 05:33 AM, Koenig, Christian wrote:
> Am 19.10.18 um 22:52 schrieb Andrey Grodzovsky:
>> Problem:
>> A particular scheduler may become unsuable (underlying HW) after
>> some event (e.g. GPU reset). If it's later chosen by
>> the get free sched. policy a command will fail to be
>> submitted.
>>
>> Fix:
>> Add a driver specific callback to report the sched status so
>> rq with bad sched can be avoided in favor of working one or
>> none in which case job init will fail.
>>
>> v2: Switch from driver callback to flag in scheduler.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c |  2 +-
>>    drivers/gpu/drm/etnaviv/etnaviv_sched.c   |  2 +-
>>    drivers/gpu/drm/scheduler/sched_entity.c  |  9 ++++++++-
>>    drivers/gpu/drm/scheduler/sched_main.c    | 10 +++++++++-
>>    drivers/gpu/drm/v3d/v3d_sched.c           |  4 ++--
>>    include/drm/gpu_scheduler.h               |  5 ++++-
>>    6 files changed, 25 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> index 5448cf2..bf845b0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> @@ -450,7 +450,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
>>    
>>    		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
>>    				   num_hw_submission, amdgpu_job_hang_limit,
>> -				   timeout, ring->name);
>> +				   timeout, ring->name, false);
>>    		if (r) {
>>    			DRM_ERROR("Failed to create scheduler on ring %s.\n",
>>    				  ring->name);
>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>> index f8c5f1e..9dca347 100644
>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>> @@ -178,7 +178,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
>>    
>>    	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
>>    			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
>> -			     msecs_to_jiffies(500), dev_name(gpu->dev));
>> +			     msecs_to_jiffies(500), dev_name(gpu->dev), true);
>>    	if (ret)
>>    		return ret;
>>    
>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
>> index 3e22a54..ba54c30 100644
>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>> @@ -130,7 +130,14 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity *entity)
>>    	int i;
>>    
>>    	for (i = 0; i < entity->num_rq_list; ++i) {
>> -		num_jobs = atomic_read(&entity->rq_list[i]->sched->num_jobs);
>> +		struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
>> +
>> +		if (!entity->rq_list[i]->sched->ready) {
>> +			DRM_WARN("sched%s is not ready, skipping", sched->name);
>> +			continue;
>> +		}
>> +
>> +		num_jobs = atomic_read(&sched->num_jobs);
>>    		if (num_jobs < min_jobs) {
>>    			min_jobs = num_jobs;
>>    			rq = entity->rq_list[i];
>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>> index 63b997d..772adec 100644
>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>> @@ -420,6 +420,9 @@ int drm_sched_job_init(struct drm_sched_job *job,
>>    	struct drm_gpu_scheduler *sched;
>>    
>>    	drm_sched_entity_select_rq(entity);
>> +	if (!entity->rq)
>> +		return -ENOENT;
>> +
>>    	sched = entity->rq->sched;
>>    
>>    	job->sched = sched;
>> @@ -598,6 +601,7 @@ static int drm_sched_main(void *param)
>>     * @hang_limit: number of times to allow a job to hang before dropping it
>>     * @timeout: timeout value in jiffies for the scheduler
>>     * @name: name used for debugging
>> + * @ready: marks if the underlying HW is ready to work
>>     *
>>     * Return 0 on success, otherwise error code.
>>     */
>> @@ -606,7 +610,8 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>>    		   unsigned hw_submission,
>>    		   unsigned hang_limit,
>>    		   long timeout,
>> -		   const char *name)
>> +		   const char *name,
>> +		   bool	ready)
> Please drop the ready flag here. We should consider a scheduler ready as
> soon as it is initialized.

Not totally agree with this because this flag marks that HW ready to run 
(the HW ring) and not the scheduler which is SW entity,
For amdgpu - drm_sched_init is called from the sw_init stage while the 
ring initialization and tests takes place in hw_init stage. Maybe if the 
flag is
named 'hw_ready' instead of just 'ready' it would make more sense ?
Also in case there is some code which today looks at 'ready' flag state 
to take some action and the code is executed after drm_sched_init but 
before
amdgpu_ring_test_helper is called it will see the 'ready' flag == true 
instead of false as it's today.

Andrey

>
> Apart from that looks good to me,
> Christian.
>
>>    {
>>    	int i;
>>    	sched->ops = ops;
>> @@ -633,6 +638,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>>    		return PTR_ERR(sched->thread);
>>    	}
>>    
>> +	sched->ready = ready;
>>    	return 0;
>>    }
>>    EXPORT_SYMBOL(drm_sched_init);
>> @@ -648,5 +654,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
>>    {
>>    	if (sched->thread)
>>    		kthread_stop(sched->thread);
>> +
>> +	sched->ready = false;
>>    }
>>    EXPORT_SYMBOL(drm_sched_fini);
>> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
>> index 80b641f..7cedb5f 100644
>> --- a/drivers/gpu/drm/v3d/v3d_sched.c
>> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
>> @@ -212,7 +212,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>>    			     &v3d_sched_ops,
>>    			     hw_jobs_limit, job_hang_limit,
>>    			     msecs_to_jiffies(hang_limit_ms),
>> -			     "v3d_bin");
>> +			     "v3d_bin", true);
>>    	if (ret) {
>>    		dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret);
>>    		return ret;
>> @@ -222,7 +222,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>>    			     &v3d_sched_ops,
>>    			     hw_jobs_limit, job_hang_limit,
>>    			     msecs_to_jiffies(hang_limit_ms),
>> -			     "v3d_render");
>> +			     "v3d_render", true);
>>    	if (ret) {
>>    		dev_err(v3d->dev, "Failed to create render scheduler: %d.",
>>    			ret);
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index 0684dcd..037caea 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -264,6 +264,7 @@ struct drm_sched_backend_ops {
>>     * @hang_limit: once the hangs by a job crosses this limit then it is marked
>>     *              guilty and it will be considered for scheduling further.
>>     * @num_jobs: the number of jobs in queue in the scheduler
>> + * @ready: marks if the underlying HW is ready to work
>>     *
>>     * One scheduler is implemented for each hardware ring.
>>     */
>> @@ -283,12 +284,14 @@ struct drm_gpu_scheduler {
>>    	spinlock_t			job_list_lock;
>>    	int				hang_limit;
>>    	atomic_t                        num_jobs;
>> +	bool			ready;
>>    };
>>    
>>    int drm_sched_init(struct drm_gpu_scheduler *sched,
>>    		   const struct drm_sched_backend_ops *ops,
>>    		   uint32_t hw_submission, unsigned hang_limit, long timeout,
>> -		   const char *name);
>> +		   const char *name,
>> +		   bool	ready);
>>    void drm_sched_fini(struct drm_gpu_scheduler *sched);
>>    int drm_sched_job_init(struct drm_sched_job *job,
>>    		       struct drm_sched_entity *entity,
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 1/2] drm/sched: Add boolean to mark if sched is ready to work v2
  2018-10-23 14:23       ` Grodzovsky, Andrey
@ 2018-10-24  7:01         ` Koenig, Christian
  0 siblings, 0 replies; 6+ messages in thread
From: Koenig, Christian @ 2018-10-24  7:01 UTC (permalink / raw)
  To: Grodzovsky, Andrey, amd-gfx, dri-devel; +Cc: Deucher, Alexander

Am 23.10.18 um 16:23 schrieb Grodzovsky, Andrey:
>
> On 10/22/2018 05:33 AM, Koenig, Christian wrote:
>> Am 19.10.18 um 22:52 schrieb Andrey Grodzovsky:
>>> Problem:
>>> A particular scheduler may become unsuable (underlying HW) after
>>> some event (e.g. GPU reset). If it's later chosen by
>>> the get free sched. policy a command will fail to be
>>> submitted.
>>>
>>> Fix:
>>> Add a driver specific callback to report the sched status so
>>> rq with bad sched can be avoided in favor of working one or
>>> none in which case job init will fail.
>>>
>>> v2: Switch from driver callback to flag in scheduler.
>>>
>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>> ---
>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c |  2 +-
>>>     drivers/gpu/drm/etnaviv/etnaviv_sched.c   |  2 +-
>>>     drivers/gpu/drm/scheduler/sched_entity.c  |  9 ++++++++-
>>>     drivers/gpu/drm/scheduler/sched_main.c    | 10 +++++++++-
>>>     drivers/gpu/drm/v3d/v3d_sched.c           |  4 ++--
>>>     include/drm/gpu_scheduler.h               |  5 ++++-
>>>     6 files changed, 25 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>>> index 5448cf2..bf845b0 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>>> @@ -450,7 +450,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
>>>     
>>>     		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
>>>     				   num_hw_submission, amdgpu_job_hang_limit,
>>> -				   timeout, ring->name);
>>> +				   timeout, ring->name, false);
>>>     		if (r) {
>>>     			DRM_ERROR("Failed to create scheduler on ring %s.\n",
>>>     				  ring->name);
>>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>> index f8c5f1e..9dca347 100644
>>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
>>> @@ -178,7 +178,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
>>>     
>>>     	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
>>>     			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
>>> -			     msecs_to_jiffies(500), dev_name(gpu->dev));
>>> +			     msecs_to_jiffies(500), dev_name(gpu->dev), true);
>>>     	if (ret)
>>>     		return ret;
>>>     
>>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
>>> index 3e22a54..ba54c30 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>>> @@ -130,7 +130,14 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity *entity)
>>>     	int i;
>>>     
>>>     	for (i = 0; i < entity->num_rq_list; ++i) {
>>> -		num_jobs = atomic_read(&entity->rq_list[i]->sched->num_jobs);
>>> +		struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
>>> +
>>> +		if (!entity->rq_list[i]->sched->ready) {
>>> +			DRM_WARN("sched%s is not ready, skipping", sched->name);
>>> +			continue;
>>> +		}
>>> +
>>> +		num_jobs = atomic_read(&sched->num_jobs);
>>>     		if (num_jobs < min_jobs) {
>>>     			min_jobs = num_jobs;
>>>     			rq = entity->rq_list[i];
>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
>>> index 63b997d..772adec 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_main.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c
>>> @@ -420,6 +420,9 @@ int drm_sched_job_init(struct drm_sched_job *job,
>>>     	struct drm_gpu_scheduler *sched;
>>>     
>>>     	drm_sched_entity_select_rq(entity);
>>> +	if (!entity->rq)
>>> +		return -ENOENT;
>>> +
>>>     	sched = entity->rq->sched;
>>>     
>>>     	job->sched = sched;
>>> @@ -598,6 +601,7 @@ static int drm_sched_main(void *param)
>>>      * @hang_limit: number of times to allow a job to hang before dropping it
>>>      * @timeout: timeout value in jiffies for the scheduler
>>>      * @name: name used for debugging
>>> + * @ready: marks if the underlying HW is ready to work
>>>      *
>>>      * Return 0 on success, otherwise error code.
>>>      */
>>> @@ -606,7 +610,8 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>>>     		   unsigned hw_submission,
>>>     		   unsigned hang_limit,
>>>     		   long timeout,
>>> -		   const char *name)
>>> +		   const char *name,
>>> +		   bool	ready)
>> Please drop the ready flag here. We should consider a scheduler ready as
>> soon as it is initialized.
> Not totally agree with this because this flag marks that HW ready to run
> (the HW ring) and not the scheduler which is SW entity,

And exactly that's incorrect. This ready flag marks if the SW scheduler 
is ready to accept job submissions. If the underlying hardware is ready 
or not is completely irrelevant for this state.

For example we should NOT set it to false during GPU reset. Only when a 
reset failed and a HW engine runs into an unrecoverable failure then we 
can set the flag to false.

Saying this setting it to false in all the IP specific *_stop() and 
*_fini() functions is probably incorrect as well.

Christian.

> For amdgpu - drm_sched_init is called from the sw_init stage while the
> ring initialization and tests takes place in hw_init stage. Maybe if the
> flag is
> named 'hw_ready' instead of just 'ready' it would make more sense ?
> Also in case there is some code which today looks at 'ready' flag state
> to take some action and the code is executed after drm_sched_init but
> before
> amdgpu_ring_test_helper is called it will see the 'ready' flag == true
> instead of false as it's today.
>
> Andrey
>
>> Apart from that looks good to me,
>> Christian.
>>
>>>     {
>>>     	int i;
>>>     	sched->ops = ops;
>>> @@ -633,6 +638,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>>>     		return PTR_ERR(sched->thread);
>>>     	}
>>>     
>>> +	sched->ready = ready;
>>>     	return 0;
>>>     }
>>>     EXPORT_SYMBOL(drm_sched_init);
>>> @@ -648,5 +654,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
>>>     {
>>>     	if (sched->thread)
>>>     		kthread_stop(sched->thread);
>>> +
>>> +	sched->ready = false;
>>>     }
>>>     EXPORT_SYMBOL(drm_sched_fini);
>>> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
>>> index 80b641f..7cedb5f 100644
>>> --- a/drivers/gpu/drm/v3d/v3d_sched.c
>>> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
>>> @@ -212,7 +212,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>>>     			     &v3d_sched_ops,
>>>     			     hw_jobs_limit, job_hang_limit,
>>>     			     msecs_to_jiffies(hang_limit_ms),
>>> -			     "v3d_bin");
>>> +			     "v3d_bin", true);
>>>     	if (ret) {
>>>     		dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret);
>>>     		return ret;
>>> @@ -222,7 +222,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>>>     			     &v3d_sched_ops,
>>>     			     hw_jobs_limit, job_hang_limit,
>>>     			     msecs_to_jiffies(hang_limit_ms),
>>> -			     "v3d_render");
>>> +			     "v3d_render", true);
>>>     	if (ret) {
>>>     		dev_err(v3d->dev, "Failed to create render scheduler: %d.",
>>>     			ret);
>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>>> index 0684dcd..037caea 100644
>>> --- a/include/drm/gpu_scheduler.h
>>> +++ b/include/drm/gpu_scheduler.h
>>> @@ -264,6 +264,7 @@ struct drm_sched_backend_ops {
>>>      * @hang_limit: once the hangs by a job crosses this limit then it is marked
>>>      *              guilty and it will be considered for scheduling further.
>>>      * @num_jobs: the number of jobs in queue in the scheduler
>>> + * @ready: marks if the underlying HW is ready to work
>>>      *
>>>      * One scheduler is implemented for each hardware ring.
>>>      */
>>> @@ -283,12 +284,14 @@ struct drm_gpu_scheduler {
>>>     	spinlock_t			job_list_lock;
>>>     	int				hang_limit;
>>>     	atomic_t                        num_jobs;
>>> +	bool			ready;
>>>     };
>>>     
>>>     int drm_sched_init(struct drm_gpu_scheduler *sched,
>>>     		   const struct drm_sched_backend_ops *ops,
>>>     		   uint32_t hw_submission, unsigned hang_limit, long timeout,
>>> -		   const char *name);
>>> +		   const char *name,
>>> +		   bool	ready);
>>>     void drm_sched_fini(struct drm_gpu_scheduler *sched);
>>>     int drm_sched_job_init(struct drm_sched_job *job,
>>>     		       struct drm_sched_entity *entity,
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-10-24  7:01 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-19 20:52 [PATCH v2 1/2] drm/sched: Add boolean to mark if sched is ready to work v2 Andrey Grodzovsky
2018-10-19 20:52 ` [PATCH v2 2/2] drm/amdgpu: Retire amdgpu_ring.ready flag Andrey Grodzovsky
     [not found]   ` <1539982364-6533-2-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
2018-10-22  9:40     ` Koenig, Christian
     [not found] ` <1539982364-6533-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
2018-10-22  9:33   ` [PATCH v2 1/2] drm/sched: Add boolean to mark if sched is ready to work v2 Koenig, Christian
     [not found]     ` <1eed2f47-90da-e518-1c14-3ad5156044a3-5C7GfCeVMHo@public.gmane.org>
2018-10-23 14:23       ` Grodzovsky, Andrey
2018-10-24  7:01         ` Koenig, Christian

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.