All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 1/4] drm/amdgpu: set compute queue priority at mqd_init
@ 2020-02-27 21:40 Nirmoy Das
  2020-02-27 21:40 ` [RFC PATCH 2/4] drm/scheduler: implement a function to modify sched list Nirmoy Das
                   ` (4 more replies)
  0 siblings, 5 replies; 15+ messages in thread
From: Nirmoy Das @ 2020-02-27 21:40 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, Ray.Huang, nirmoy.das, christian.koenig

We were changing compute ring priority while rings were being used
before every job submission which is not recommended. This patch
sets compute queue priority at mqd initialization for gfx8 and gfx9.

Policy: Enable high priority compute queues only if gpu has >1 MEC, if
so PIPE0 and PIPE1 will be in high priority.

high/normal priority compute sched list are generated from set of high/normal
priority compute queues. At context creation, entity of compute queue
get a sched list from high or normal priority depending on ctx->priority

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 ----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  | 19 +++++++++++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 14 ++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  | 12 ++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  6 ------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c    | 23 ++++++++++++++++++++---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 20 ++++++++++++++++++++
 8 files changed, 82 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f397ff97b4e4..8304d0c87899 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1205,7 +1205,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct drm_sched_entity *entity = p->entity;
 	enum drm_sched_priority priority;
-	struct amdgpu_ring *ring;
 	struct amdgpu_bo_list_entry *e;
 	struct amdgpu_job *job;
 	uint64_t seq;
@@ -1258,9 +1257,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	priority = job->base.s_priority;
 	drm_sched_entity_push_job(&job->base, entity);
 
-	ring = to_amdgpu_ring(entity->rq->sched);
-	amdgpu_ring_priority_get(ring, priority);
-
 	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
 
 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 94a6c42f29ea..a1742b1d4f9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -85,8 +85,14 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const
 			num_scheds = 1;
 			break;
 		case AMDGPU_HW_IP_COMPUTE:
-			scheds = adev->gfx.compute_sched;
-			num_scheds = adev->gfx.num_compute_sched;
+			if (priority > DRM_SCHED_PRIORITY_NORMAL &&
+			    adev->gfx.num_compute_sched_high) {
+				scheds = adev->gfx.compute_sched_high;
+				num_scheds = adev->gfx.num_compute_sched_high;
+			} else {
+				scheds = adev->gfx.compute_sched;
+				num_scheds = adev->gfx.num_compute_sched;
+			}
 			break;
 		case AMDGPU_HW_IP_DMA:
 			scheds = adev->sdma.sdma_sched;
@@ -638,8 +644,13 @@ void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
 	}
 
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		adev->gfx.compute_sched[i] = &adev->gfx.compute_ring[i].sched;
-		adev->gfx.num_compute_sched++;
+		if (!adev->gfx.compute_ring[i].high_priority) {
+			adev->gfx.compute_sched[adev->gfx.num_compute_sched++] =
+				&adev->gfx.compute_ring[i].sched;
+		} else {
+			adev->gfx.compute_sched_high[adev->gfx.num_compute_sched_high++] =
+				&adev->gfx.compute_ring[i].sched;
+		}
 	}
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 7403588684b3..bdea5d44edf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -192,6 +192,20 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
 	return adev->gfx.mec.num_mec > 1;
 }
 
+bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
+					       int queue)
+{
+	bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
+
+	/* only enable high priority queue if more than 1 MEC.
+	 * When multipipe_policy is true amdgpu gets queue 0,1 from each pipe of
+	 * 1st MEC. Policy: make queue 0 of each pipe as high priority compute queue */
+	if (multipipe_policy && queue == 0)
+		return true;
+
+	return false;
+}
+
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
 	int i, queue, pipe, mec;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 37ba05b63b2a..13ee0ea66c6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -41,6 +41,14 @@
 #define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
 #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
 
+#define AMDGPU_GFX_PIPE_PRIO_LOW    0
+#define AMDGPU_GFX_PIPE_PRIO_NORMAL 1
+#define AMDGPU_GFX_PIPE_PRIO_HIGH   2
+
+#define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM  0
+#define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM  15
+
+
 struct amdgpu_mec {
 	struct amdgpu_bo	*hpd_eop_obj;
 	u64			hpd_eop_gpu_addr;
@@ -281,7 +289,9 @@ struct amdgpu_gfx {
 	unsigned			num_gfx_rings;
 	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
 	struct drm_gpu_scheduler	*compute_sched[AMDGPU_MAX_COMPUTE_RINGS];
+	struct drm_gpu_scheduler	*compute_sched_high[AMDGPU_MAX_COMPUTE_RINGS];
 	uint32_t			num_compute_sched;
+	uint32_t			num_compute_sched_high;
 	unsigned			num_compute_rings;
 	struct amdgpu_irq_src		eop_irq;
 	struct amdgpu_irq_src		priv_reg_irq;
@@ -363,6 +373,8 @@ void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
 				 int *mec, int *pipe, int *queue);
 bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
 				     int pipe, int queue);
+bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
+					       int queue);
 int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
 			       int pipe, int queue);
 void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index d42be880a236..4981e443a884 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -117,12 +117,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
 
 static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 {
-	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
 	struct amdgpu_job *job = to_amdgpu_job(s_job);
 
 	drm_sched_job_cleanup(s_job);
 
-	amdgpu_ring_priority_put(ring, s_job->s_priority);
 	dma_fence_put(job->fence);
 	amdgpu_sync_free(&job->sync);
 	amdgpu_sync_free(&job->sched_sync);
@@ -143,7 +141,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
 		      void *owner, struct dma_fence **f)
 {
 	enum drm_sched_priority priority;
-	struct amdgpu_ring *ring;
 	int r;
 
 	if (!f)
@@ -158,9 +155,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
 	priority = job->base.s_priority;
 	drm_sched_entity_push_job(&job->base, entity);
 
-	ring = to_amdgpu_ring(entity->rq->sched);
-	amdgpu_ring_priority_get(ring, priority);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 24caff085d00..34fcd467f18d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -222,6 +222,7 @@ struct amdgpu_ring {
 	struct mutex		priority_mutex;
 	/* protected by priority_mutex */
 	int			priority;
+	bool			high_priority;
 
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry *ent;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index e63f98b2d389..4260becd569b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4426,6 +4426,23 @@ static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
 	return r;
 }
 
+static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
+			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+			ring->high_priority = true;
+			mqd->cp_hqd_queue_priority =
+				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
+			mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
+		}
+		else
+			ring->high_priority = false;
+	}
+}
+
 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -4549,9 +4566,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	/* defaults */
 	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
 	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
-	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
-	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
-	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
 	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
 	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
 	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
@@ -4563,6 +4577,9 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
 	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
 
+	/* set static priority for a queue/ring */
+	gfx_v8_0_mqd_set_priority(ring, mqd);
+
 	/* map_queues packet doesn't need activate the queue,
 	 * so only kiq need set this field.
 	 */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 4135e4126e82..85a54849abd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3310,6 +3310,23 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
 }
 
+static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
+		if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
+			mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
+			ring->high_priority = true;
+			mqd->cp_hqd_queue_priority =
+				AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
+			mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
+		}
+		else
+			ring->high_priority = false;
+	}
+}
+
 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -3446,6 +3463,9 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
 	mqd->cp_hqd_ib_control = tmp;
 
+	/* set static priority for a queue/ring */
+	gfx_v9_0_mqd_set_priority(ring, mqd);
+
 	/* map_queues packet doesn't need activate the queue,
 	 * so only kiq need set this field.
 	 */
-- 
2.25.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH 2/4] drm/scheduler: implement a function to modify sched list
  2020-02-27 21:40 [RFC PATCH 1/4] drm/amdgpu: set compute queue priority at mqd_init Nirmoy Das
@ 2020-02-27 21:40 ` Nirmoy Das
  2020-02-28  5:08   ` Luben Tuikov
  2020-02-27 21:40 ` [RFC PATCH 3/4] drm/amdgpu: change hw sched list on ctx priority override Nirmoy Das
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 15+ messages in thread
From: Nirmoy Das @ 2020-02-27 21:40 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, Ray.Huang, nirmoy.das, christian.koenig

implement drm_sched_entity_modify_sched() which can modify existing
sched_list with a different one. This is going to be helpful when
userspace changes priority of a ctx/entity then driver can switch to
corresponding hw shced list for that priority

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/scheduler/sched_entity.c | 24 ++++++++++++++++++++++++
 include/drm/gpu_scheduler.h              |  4 ++++
 2 files changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 63bccd201b97..711e9d504bcb 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -83,6 +83,30 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
 }
 EXPORT_SYMBOL(drm_sched_entity_init);
 
+/**
+ * drm_sched_entity_modify_sched - Modify sched of an entity
+ *
+ * @entity: scheduler entity to init
+ * @sched_list: the list of new drm scheds which will replace
+ *		existing entity->sched_list
+ * @num_sched_list: number of drm sched in sched_list
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
+				  struct drm_gpu_scheduler **sched_list,
+				  unsigned int num_sched_list)
+{
+	if (!(entity && sched_list && (num_sched_list == 0 || sched_list[0])))
+		return -EINVAL;
+
+	entity->sched_list = num_sched_list > 1 ? sched_list : NULL;
+	entity->num_sched_list = num_sched_list;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_sched_entity_modify_sched);
+
 /**
  * drm_sched_entity_is_idle - Check if entity is idle
  *
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 589be851f8a1..0c164a96d51b 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -297,6 +297,10 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched);
 int drm_sched_job_init(struct drm_sched_job *job,
 		       struct drm_sched_entity *entity,
 		       void *owner);
+int drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
+				  struct drm_gpu_scheduler **sched_list,
+                                  unsigned int num_sched_list);
+
 void drm_sched_job_cleanup(struct drm_sched_job *job);
 void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
 void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
-- 
2.25.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH 3/4] drm/amdgpu: change hw sched list on ctx priority override
  2020-02-27 21:40 [RFC PATCH 1/4] drm/amdgpu: set compute queue priority at mqd_init Nirmoy Das
  2020-02-27 21:40 ` [RFC PATCH 2/4] drm/scheduler: implement a function to modify sched list Nirmoy Das
@ 2020-02-27 21:40 ` Nirmoy Das
  2020-02-28  4:17   ` Luben Tuikov
  2020-02-27 21:40 ` [RFC PATCH 4/4] drm/amdgpu: remove unused functions Nirmoy Das
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 15+ messages in thread
From: Nirmoy Das @ 2020-02-27 21:40 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, Ray.Huang, nirmoy.das, christian.koenig

Switch to appropriate sched list for an entity on priority override.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 54 ++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a1742b1d4f9c..69a791430b25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -508,11 +508,53 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 	return fence;
 }
 
+static int amdgpu_ctx_change_sched(struct amdgpu_ctx *ctx,
+				   struct amdgpu_ctx_entity *aentity,
+				   int hw_ip, enum drm_sched_priority priority)
+{
+	struct amdgpu_device *adev = ctx->adev;
+	struct drm_gpu_scheduler **scheds = NULL;
+	unsigned num_scheds = 0;
+
+	switch (hw_ip) {
+		case AMDGPU_HW_IP_COMPUTE:
+			if (priority > DRM_SCHED_PRIORITY_NORMAL &&
+			    adev->gfx.num_compute_sched_high) {
+				scheds = adev->gfx.compute_sched_high;
+				num_scheds = adev->gfx.num_compute_sched_high;
+			} else {
+				scheds = adev->gfx.compute_sched;
+				num_scheds = adev->gfx.num_compute_sched;
+			}
+			break;
+		default:
+			return 0;
+	}
+
+	return drm_sched_entity_modify_sched(&aentity->entity, scheds, num_scheds);
+}
+
+static int amdgpu_ctx_hw_priority_override(struct amdgpu_ctx *ctx,
+					    const u32 hw_ip,
+					    enum drm_sched_priority priority)
+{
+	int r = 0, i;
+
+	for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
+		if (!ctx->entities[hw_ip][i])
+			continue;
+		r = amdgpu_ctx_change_sched(ctx, ctx->entities[hw_ip][i],
+					    hw_ip, priority);
+	}
+
+	return r;
+}
+
 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 				  enum drm_sched_priority priority)
 {
 	enum drm_sched_priority ctx_prio;
-	unsigned i, j;
+	unsigned r, i, j;
 
 	ctx->override_priority = priority;
 
@@ -521,11 +563,21 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
 			struct drm_sched_entity *entity;
+			struct amdgpu_ring *ring;
 
 			if (!ctx->entities[i][j])
 				continue;
 
 			entity = &ctx->entities[i][j]->entity;
+			ring = to_amdgpu_ring(entity->rq->sched);
+
+			if (ring->high_priority) {
+				r = amdgpu_ctx_hw_priority_override(ctx, i,
+								    ctx_prio);
+				if (r)
+					DRM_ERROR("Failed to override HW priority for %s",
+						  ring->name);
+			}
 			drm_sched_entity_set_priority(entity, ctx_prio);
 		}
 	}
-- 
2.25.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH 4/4] drm/amdgpu: remove unused functions
  2020-02-27 21:40 [RFC PATCH 1/4] drm/amdgpu: set compute queue priority at mqd_init Nirmoy Das
  2020-02-27 21:40 ` [RFC PATCH 2/4] drm/scheduler: implement a function to modify sched list Nirmoy Das
  2020-02-27 21:40 ` [RFC PATCH 3/4] drm/amdgpu: change hw sched list on ctx priority override Nirmoy Das
@ 2020-02-27 21:40 ` Nirmoy Das
  2020-02-27 21:42 ` [RFC PATCH 1/4] drm/amdgpu: set compute queue priority at mqd_init Nirmoy
  2020-02-27 21:48 ` Alex Deucher
  4 siblings, 0 replies; 15+ messages in thread
From: Nirmoy Das @ 2020-02-27 21:40 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, Ray.Huang, nirmoy.das, christian.koenig

amdgpu statically set priority for compute queues
at initialization so remove all the functions
responsible changing compute queue priority dynamically

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  70 ----------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   7 --
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c    |  99 ----------------------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 100 -----------------------
 4 files changed, 276 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 18e11b0fdc3e..4d603ee72b8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -150,76 +150,6 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
 		ring->funcs->end_use(ring);
 }
 
-/**
- * amdgpu_ring_priority_put - restore a ring's priority
- *
- * @ring: amdgpu_ring structure holding the information
- * @priority: target priority
- *
- * Release a request for executing at @priority
- */
-void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
-			      enum drm_sched_priority priority)
-{
-	int i;
-
-	if (!ring->funcs->set_priority)
-		return;
-
-	if (atomic_dec_return(&ring->num_jobs[priority]) > 0)
-		return;
-
-	/* no need to restore if the job is already at the lowest priority */
-	if (priority == DRM_SCHED_PRIORITY_NORMAL)
-		return;
-
-	mutex_lock(&ring->priority_mutex);
-	/* something higher prio is executing, no need to decay */
-	if (ring->priority > priority)
-		goto out_unlock;
-
-	/* decay priority to the next level with a job available */
-	for (i = priority; i >= DRM_SCHED_PRIORITY_MIN; i--) {
-		if (i == DRM_SCHED_PRIORITY_NORMAL
-				|| atomic_read(&ring->num_jobs[i])) {
-			ring->priority = i;
-			ring->funcs->set_priority(ring, i);
-			break;
-		}
-	}
-
-out_unlock:
-	mutex_unlock(&ring->priority_mutex);
-}
-
-/**
- * amdgpu_ring_priority_get - change the ring's priority
- *
- * @ring: amdgpu_ring structure holding the information
- * @priority: target priority
- *
- * Request a ring's priority to be raised to @priority (refcounted).
- */
-void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
-			      enum drm_sched_priority priority)
-{
-	if (!ring->funcs->set_priority)
-		return;
-
-	if (atomic_inc_return(&ring->num_jobs[priority]) <= 0)
-		return;
-
-	mutex_lock(&ring->priority_mutex);
-	if (priority <= ring->priority)
-		goto out_unlock;
-
-	ring->priority = priority;
-	ring->funcs->set_priority(ring, priority);
-
-out_unlock:
-	mutex_unlock(&ring->priority_mutex);
-}
-
 /**
  * amdgpu_ring_init - init driver ring struct.
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 34fcd467f18d..87ec35b68bfd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -167,9 +167,6 @@ struct amdgpu_ring_funcs {
 					uint32_t reg0, uint32_t reg1,
 					uint32_t ref, uint32_t mask);
 	void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
-	/* priority functions */
-	void (*set_priority) (struct amdgpu_ring *ring,
-			      enum drm_sched_priority priority);
 	/* Try to soft recover the ring to make the fence signal */
 	void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
 	int (*preempt_ib)(struct amdgpu_ring *ring);
@@ -259,10 +256,6 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
-void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
-			      enum drm_sched_priority priority);
-void amdgpu_ring_priority_put(struct amdgpu_ring *ring,
-			      enum drm_sched_priority priority);
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
 		     unsigned irq_type);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 4260becd569b..3cd9e77c66eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6258,104 +6258,6 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
 	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
 }
 
-static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
-					   bool acquire)
-{
-	struct amdgpu_device *adev = ring->adev;
-	int pipe_num, tmp, reg;
-	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
-
-	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
-
-	/* first me only has 2 entries, GFX and HP3D */
-	if (ring->me > 0)
-		pipe_num -= 2;
-
-	reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num;
-	tmp = RREG32(reg);
-	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
-	WREG32(reg, tmp);
-}
-
-static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev,
-					    struct amdgpu_ring *ring,
-					    bool acquire)
-{
-	int i, pipe;
-	bool reserve;
-	struct amdgpu_ring *iring;
-
-	mutex_lock(&adev->gfx.pipe_reserve_mutex);
-	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
-	if (acquire)
-		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
-	else
-		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
-
-	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
-		/* Clear all reservations - everyone reacquires all resources */
-		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
-			gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
-						       true);
-
-		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
-			gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
-						       true);
-	} else {
-		/* Lower all pipes without a current reservation */
-		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
-			iring = &adev->gfx.gfx_ring[i];
-			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
-							   iring->me,
-							   iring->pipe,
-							   0);
-			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
-			gfx_v8_0_ring_set_pipe_percent(iring, reserve);
-		}
-
-		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
-			iring = &adev->gfx.compute_ring[i];
-			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
-							   iring->me,
-							   iring->pipe,
-							   0);
-			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
-			gfx_v8_0_ring_set_pipe_percent(iring, reserve);
-		}
-	}
-
-	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
-}
-
-static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev,
-				      struct amdgpu_ring *ring,
-				      bool acquire)
-{
-	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
-	uint32_t queue_priority = acquire ? 0xf : 0x0;
-
-	mutex_lock(&adev->srbm_mutex);
-	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-
-	WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority);
-	WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority);
-
-	vi_srbm_select(adev, 0, 0, 0, 0);
-	mutex_unlock(&adev->srbm_mutex);
-}
-static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
-					       enum drm_sched_priority priority)
-{
-	struct amdgpu_device *adev = ring->adev;
-	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
-
-	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
-		return;
-
-	gfx_v8_0_hqd_set_priority(adev, ring, acquire);
-	gfx_v8_0_pipe_reserve_resources(adev, ring, acquire);
-}
-
 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
 					     u64 addr, u64 seq,
 					     unsigned flags)
@@ -6988,7 +6890,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
-	.set_priority = gfx_v8_0_ring_set_priority_compute,
 	.emit_wreg = gfx_v8_0_ring_emit_wreg,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 85a54849abd1..3522e170adad 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -5112,105 +5112,6 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
 	return wptr;
 }
 
-static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
-					   bool acquire)
-{
-	struct amdgpu_device *adev = ring->adev;
-	int pipe_num, tmp, reg;
-	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
-
-	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
-
-	/* first me only has 2 entries, GFX and HP3D */
-	if (ring->me > 0)
-		pipe_num -= 2;
-
-	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
-	tmp = RREG32(reg);
-	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
-	WREG32(reg, tmp);
-}
-
-static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
-					    struct amdgpu_ring *ring,
-					    bool acquire)
-{
-	int i, pipe;
-	bool reserve;
-	struct amdgpu_ring *iring;
-
-	mutex_lock(&adev->gfx.pipe_reserve_mutex);
-	pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
-	if (acquire)
-		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
-	else
-		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
-
-	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
-		/* Clear all reservations - everyone reacquires all resources */
-		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
-			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
-						       true);
-
-		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
-			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
-						       true);
-	} else {
-		/* Lower all pipes without a current reservation */
-		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
-			iring = &adev->gfx.gfx_ring[i];
-			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
-							   iring->me,
-							   iring->pipe,
-							   0);
-			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
-			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
-		}
-
-		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
-			iring = &adev->gfx.compute_ring[i];
-			pipe = amdgpu_gfx_mec_queue_to_bit(adev,
-							   iring->me,
-							   iring->pipe,
-							   0);
-			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
-			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
-		}
-	}
-
-	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
-}
-
-static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
-				      struct amdgpu_ring *ring,
-				      bool acquire)
-{
-	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
-	uint32_t queue_priority = acquire ? 0xf : 0x0;
-
-	mutex_lock(&adev->srbm_mutex);
-	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-
-	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
-	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
-
-	soc15_grbm_select(adev, 0, 0, 0, 0);
-	mutex_unlock(&adev->srbm_mutex);
-}
-
-static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
-					       enum drm_sched_priority priority)
-{
-	struct amdgpu_device *adev = ring->adev;
-	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
-
-	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
-		return;
-
-	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
-	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
-}
-
 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -6581,7 +6482,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 	.test_ib = gfx_v9_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
-	.set_priority = gfx_v9_0_ring_set_priority_compute,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
-- 
2.25.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 1/4] drm/amdgpu: set compute queue priority at mqd_init
  2020-02-27 21:40 [RFC PATCH 1/4] drm/amdgpu: set compute queue priority at mqd_init Nirmoy Das
                   ` (2 preceding siblings ...)
  2020-02-27 21:40 ` [RFC PATCH 4/4] drm/amdgpu: remove unused functions Nirmoy Das
@ 2020-02-27 21:42 ` Nirmoy
  2020-02-27 21:48 ` Alex Deucher
  4 siblings, 0 replies; 15+ messages in thread
From: Nirmoy @ 2020-02-27 21:42 UTC (permalink / raw)
  To: Nirmoy Das, amd-gfx
  Cc: alexander.deucher, Ray.Huang, nirmoy.das, christian.koenig


On 2/27/20 10:40 PM, Nirmoy Das wrote:
> We were changing compute ring priority while rings were being used
> before every job submission which is not recommended. This patch
> sets compute queue priority at mqd initialization for gfx8 and gfx9.
>
> Policy: Enable high priority compute queues only if gpu has >1 MEC, if
> so PIPE0 and PIPE1 will be in high priority.
s/PIPE0 and PIPE1 will be in high priority/ 1st queue of each pipe will 
set to high priority
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 1/4] drm/amdgpu: set compute queue priority at mqd_init
  2020-02-27 21:40 [RFC PATCH 1/4] drm/amdgpu: set compute queue priority at mqd_init Nirmoy Das
                   ` (3 preceding siblings ...)
  2020-02-27 21:42 ` [RFC PATCH 1/4] drm/amdgpu: set compute queue priority at mqd_init Nirmoy
@ 2020-02-27 21:48 ` Alex Deucher
  2020-02-27 21:50   ` Alex Deucher
  4 siblings, 1 reply; 15+ messages in thread
From: Alex Deucher @ 2020-02-27 21:48 UTC (permalink / raw)
  To: Nirmoy Das
  Cc: Deucher, Alexander, Huang Rui, Nirmoy Das, Christian Koenig,
	amd-gfx list

On Thu, Feb 27, 2020 at 4:37 PM Nirmoy Das <nirmoy.aiemd@gmail.com> wrote:
>
> We were changing compute ring priority while rings were being used
> before every job submission which is not recommended. This patch
> sets compute queue priority at mqd initialization for gfx8 and gfx9.
>
> Policy: Enable high priority compute queues only if gpu has >1 MEC, if
> so PIPE0 and PIPE1 will be in high priority.

I think we still want high priority queues on even when there is only
one MEC.  It might work better on multi-MEC chips, but it should still
work on single MEC chips.

>
> high/normal priority compute sched list are generated from set of high/normal
> priority compute queues. At context creation, entity of compute queue
> get a sched list from high or normal priority depending on ctx->priority
>
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 ----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  | 19 +++++++++++++++----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 14 ++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  | 12 ++++++++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  6 ------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c    | 23 ++++++++++++++++++++---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 20 ++++++++++++++++++++
>  8 files changed, 82 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index f397ff97b4e4..8304d0c87899 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1205,7 +1205,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>         struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>         struct drm_sched_entity *entity = p->entity;
>         enum drm_sched_priority priority;
> -       struct amdgpu_ring *ring;
>         struct amdgpu_bo_list_entry *e;
>         struct amdgpu_job *job;
>         uint64_t seq;
> @@ -1258,9 +1257,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>         priority = job->base.s_priority;
>         drm_sched_entity_push_job(&job->base, entity);
>
> -       ring = to_amdgpu_ring(entity->rq->sched);
> -       amdgpu_ring_priority_get(ring, priority);
> -
>         amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>
>         ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index 94a6c42f29ea..a1742b1d4f9c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -85,8 +85,14 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const
>                         num_scheds = 1;
>                         break;
>                 case AMDGPU_HW_IP_COMPUTE:
> -                       scheds = adev->gfx.compute_sched;
> -                       num_scheds = adev->gfx.num_compute_sched;
> +                       if (priority > DRM_SCHED_PRIORITY_NORMAL &&
> +                           adev->gfx.num_compute_sched_high) {
> +                               scheds = adev->gfx.compute_sched_high;
> +                               num_scheds = adev->gfx.num_compute_sched_high;
> +                       } else {
> +                               scheds = adev->gfx.compute_sched;
> +                               num_scheds = adev->gfx.num_compute_sched;
> +                       }
>                         break;
>                 case AMDGPU_HW_IP_DMA:
>                         scheds = adev->sdma.sdma_sched;
> @@ -638,8 +644,13 @@ void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
>         }
>
>         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> -               adev->gfx.compute_sched[i] = &adev->gfx.compute_ring[i].sched;
> -               adev->gfx.num_compute_sched++;
> +               if (!adev->gfx.compute_ring[i].high_priority) {
> +                       adev->gfx.compute_sched[adev->gfx.num_compute_sched++] =
> +                               &adev->gfx.compute_ring[i].sched;
> +               } else {
> +                       adev->gfx.compute_sched_high[adev->gfx.num_compute_sched_high++] =
> +                               &adev->gfx.compute_ring[i].sched;
> +               }
>         }
>
>         for (i = 0; i < adev->sdma.num_instances; i++) {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 7403588684b3..bdea5d44edf4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -192,6 +192,20 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
>         return adev->gfx.mec.num_mec > 1;
>  }
>
> +bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
> +                                              int queue)
> +{
> +       bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
> +
> +       /* only enable high priority queue if more than 1 MEC.
> +        * When multipipe_policy is true amdgpu gets queue 0,1 from each pipe of
> +        * 1st MEC. Policy: make queue 0 of each pipe as high priority compute queue */
> +       if (multipipe_policy && queue == 0)
> +               return true;
> +
> +       return false;
> +}
> +
>  void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
>  {
>         int i, queue, pipe, mec;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 37ba05b63b2a..13ee0ea66c6f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -41,6 +41,14 @@
>  #define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
>  #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
>
> +#define AMDGPU_GFX_PIPE_PRIO_LOW    0
> +#define AMDGPU_GFX_PIPE_PRIO_NORMAL 1
> +#define AMDGPU_GFX_PIPE_PRIO_HIGH   2
> +
> +#define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM  0
> +#define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM  15
> +
> +
>  struct amdgpu_mec {
>         struct amdgpu_bo        *hpd_eop_obj;
>         u64                     hpd_eop_gpu_addr;
> @@ -281,7 +289,9 @@ struct amdgpu_gfx {
>         unsigned                        num_gfx_rings;
>         struct amdgpu_ring              compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
>         struct drm_gpu_scheduler        *compute_sched[AMDGPU_MAX_COMPUTE_RINGS];
> +       struct drm_gpu_scheduler        *compute_sched_high[AMDGPU_MAX_COMPUTE_RINGS];
>         uint32_t                        num_compute_sched;
> +       uint32_t                        num_compute_sched_high;
>         unsigned                        num_compute_rings;
>         struct amdgpu_irq_src           eop_irq;
>         struct amdgpu_irq_src           priv_reg_irq;
> @@ -363,6 +373,8 @@ void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
>                                  int *mec, int *pipe, int *queue);
>  bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
>                                      int pipe, int queue);
> +bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
> +                                              int queue);
>  int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
>                                int pipe, int queue);
>  void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index d42be880a236..4981e443a884 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -117,12 +117,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
>
>  static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
>  {
> -       struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>         struct amdgpu_job *job = to_amdgpu_job(s_job);
>
>         drm_sched_job_cleanup(s_job);
>
> -       amdgpu_ring_priority_put(ring, s_job->s_priority);
>         dma_fence_put(job->fence);
>         amdgpu_sync_free(&job->sync);
>         amdgpu_sync_free(&job->sched_sync);
> @@ -143,7 +141,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
>                       void *owner, struct dma_fence **f)
>  {
>         enum drm_sched_priority priority;
> -       struct amdgpu_ring *ring;
>         int r;
>
>         if (!f)
> @@ -158,9 +155,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
>         priority = job->base.s_priority;
>         drm_sched_entity_push_job(&job->base, entity);
>
> -       ring = to_amdgpu_ring(entity->rq->sched);
> -       amdgpu_ring_priority_get(ring, priority);
> -
>         return 0;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 24caff085d00..34fcd467f18d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -222,6 +222,7 @@ struct amdgpu_ring {
>         struct mutex            priority_mutex;
>         /* protected by priority_mutex */
>         int                     priority;
> +       bool                    high_priority;
>
>  #if defined(CONFIG_DEBUG_FS)
>         struct dentry *ent;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index e63f98b2d389..4260becd569b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -4426,6 +4426,23 @@ static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
>         return r;
>  }
>
> +static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
> +{
> +       struct amdgpu_device *adev = ring->adev;
> +
> +       if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
> +               if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
> +                       mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
> +                       ring->high_priority = true;
> +                       mqd->cp_hqd_queue_priority =
> +                               AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
> +                       mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);

I think we can move this back to mqd_init().  It needs to be set up
for both low and high priority queues.

> +               }
> +               else
> +                       ring->high_priority = false;
> +       }
> +}
> +
>  static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
>  {
>         struct amdgpu_device *adev = ring->adev;
> @@ -4549,9 +4566,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
>         /* defaults */
>         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
>         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
> -       mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
> -       mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
> -       mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
>         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
>         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
>         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
> @@ -4563,6 +4577,9 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
>         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
>         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
>
> +       /* set static priority for a queue/ring */
> +       gfx_v8_0_mqd_set_priority(ring, mqd);
> +
>         /* map_queues packet doesn't need activate the queue,
>          * so only kiq need set this field.
>          */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 4135e4126e82..85a54849abd1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3310,6 +3310,23 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
>         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
>  }
>
> +static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
> +{
> +       struct amdgpu_device *adev = ring->adev;
> +
> +       if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
> +               if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
> +                       mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
> +                       ring->high_priority = true;
> +                       mqd->cp_hqd_queue_priority =
> +                               AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
> +                       mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);

Same comment here.

Alex

> +               }
> +               else
> +                       ring->high_priority = false;
> +       }
> +}
> +
>  static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
>  {
>         struct amdgpu_device *adev = ring->adev;
> @@ -3446,6 +3463,9 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
>         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
>         mqd->cp_hqd_ib_control = tmp;
>
> +       /* set static priority for a queue/ring */
> +       gfx_v9_0_mqd_set_priority(ring, mqd);
> +
>         /* map_queues packet doesn't need activate the queue,
>          * so only kiq need set this field.
>          */
> --
> 2.25.0
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 1/4] drm/amdgpu: set compute queue priority at mqd_init
  2020-02-27 21:48 ` Alex Deucher
@ 2020-02-27 21:50   ` Alex Deucher
  2020-02-27 21:56     ` Das, Nirmoy
  0 siblings, 1 reply; 15+ messages in thread
From: Alex Deucher @ 2020-02-27 21:50 UTC (permalink / raw)
  To: Nirmoy Das
  Cc: Deucher, Alexander, Huang Rui, Nirmoy Das, Christian Koenig,
	amd-gfx list

On Thu, Feb 27, 2020 at 4:48 PM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> On Thu, Feb 27, 2020 at 4:37 PM Nirmoy Das <nirmoy.aiemd@gmail.com> wrote:
> >
> > We were changing compute ring priority while rings were being used
> > before every job submission which is not recommended. This patch
> > sets compute queue priority at mqd initialization for gfx8 and gfx9.
> >
> > Policy: Enable high priority compute queues only if gpu has >1 MEC, if
> > so PIPE0 and PIPE1 will be in high priority.
>
> I think we still want high priority queues on even when there is only
> one MEC.  It might work better on multi-MEC chips, but it should still
> work on single MEC chips.
>
> >
> > high/normal priority compute sched list are generated from set of high/normal
> > priority compute queues. At context creation, entity of compute queue
> > get a sched list from high or normal priority depending on ctx->priority
> >
> > Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 ----
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  | 19 +++++++++++++++----
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 14 ++++++++++++++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  | 12 ++++++++++++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  6 ------
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
> >  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c    | 23 ++++++++++++++++++++---
> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 20 ++++++++++++++++++++

Also, can you add a patch for gfx_v10_0.c?

Alex

> >  8 files changed, 82 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index f397ff97b4e4..8304d0c87899 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -1205,7 +1205,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> >         struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
> >         struct drm_sched_entity *entity = p->entity;
> >         enum drm_sched_priority priority;
> > -       struct amdgpu_ring *ring;
> >         struct amdgpu_bo_list_entry *e;
> >         struct amdgpu_job *job;
> >         uint64_t seq;
> > @@ -1258,9 +1257,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> >         priority = job->base.s_priority;
> >         drm_sched_entity_push_job(&job->base, entity);
> >
> > -       ring = to_amdgpu_ring(entity->rq->sched);
> > -       amdgpu_ring_priority_get(ring, priority);
> > -
> >         amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
> >
> >         ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> > index 94a6c42f29ea..a1742b1d4f9c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> > @@ -85,8 +85,14 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const
> >                         num_scheds = 1;
> >                         break;
> >                 case AMDGPU_HW_IP_COMPUTE:
> > -                       scheds = adev->gfx.compute_sched;
> > -                       num_scheds = adev->gfx.num_compute_sched;
> > +                       if (priority > DRM_SCHED_PRIORITY_NORMAL &&
> > +                           adev->gfx.num_compute_sched_high) {
> > +                               scheds = adev->gfx.compute_sched_high;
> > +                               num_scheds = adev->gfx.num_compute_sched_high;
> > +                       } else {
> > +                               scheds = adev->gfx.compute_sched;
> > +                               num_scheds = adev->gfx.num_compute_sched;
> > +                       }
> >                         break;
> >                 case AMDGPU_HW_IP_DMA:
> >                         scheds = adev->sdma.sdma_sched;
> > @@ -638,8 +644,13 @@ void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
> >         }
> >
> >         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> > -               adev->gfx.compute_sched[i] = &adev->gfx.compute_ring[i].sched;
> > -               adev->gfx.num_compute_sched++;
> > +               if (!adev->gfx.compute_ring[i].high_priority) {
> > +                       adev->gfx.compute_sched[adev->gfx.num_compute_sched++] =
> > +                               &adev->gfx.compute_ring[i].sched;
> > +               } else {
> > +                       adev->gfx.compute_sched_high[adev->gfx.num_compute_sched_high++] =
> > +                               &adev->gfx.compute_ring[i].sched;
> > +               }
> >         }
> >
> >         for (i = 0; i < adev->sdma.num_instances; i++) {
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > index 7403588684b3..bdea5d44edf4 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > @@ -192,6 +192,20 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
> >         return adev->gfx.mec.num_mec > 1;
> >  }
> >
> > +bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
> > +                                              int queue)
> > +{
> > +       bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
> > +
> > +       /* only enable high priority queue if more than 1 MEC.
> > +        * When multipipe_policy is true amdgpu gets queue 0,1 from each pipe of
> > +        * 1st MEC. Policy: make queue 0 of each pipe as high priority compute queue */
> > +       if (multipipe_policy && queue == 0)
> > +               return true;
> > +
> > +       return false;
> > +}
> > +
> >  void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
> >  {
> >         int i, queue, pipe, mec;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > index 37ba05b63b2a..13ee0ea66c6f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > @@ -41,6 +41,14 @@
> >  #define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
> >  #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
> >
> > +#define AMDGPU_GFX_PIPE_PRIO_LOW    0
> > +#define AMDGPU_GFX_PIPE_PRIO_NORMAL 1
> > +#define AMDGPU_GFX_PIPE_PRIO_HIGH   2
> > +
> > +#define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM  0
> > +#define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM  15
> > +
> > +
> >  struct amdgpu_mec {
> >         struct amdgpu_bo        *hpd_eop_obj;
> >         u64                     hpd_eop_gpu_addr;
> > @@ -281,7 +289,9 @@ struct amdgpu_gfx {
> >         unsigned                        num_gfx_rings;
> >         struct amdgpu_ring              compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> >         struct drm_gpu_scheduler        *compute_sched[AMDGPU_MAX_COMPUTE_RINGS];
> > +       struct drm_gpu_scheduler        *compute_sched_high[AMDGPU_MAX_COMPUTE_RINGS];
> >         uint32_t                        num_compute_sched;
> > +       uint32_t                        num_compute_sched_high;
> >         unsigned                        num_compute_rings;
> >         struct amdgpu_irq_src           eop_irq;
> >         struct amdgpu_irq_src           priv_reg_irq;
> > @@ -363,6 +373,8 @@ void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
> >                                  int *mec, int *pipe, int *queue);
> >  bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
> >                                      int pipe, int queue);
> > +bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
> > +                                              int queue);
> >  int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
> >                                int pipe, int queue);
> >  void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > index d42be880a236..4981e443a884 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > @@ -117,12 +117,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
> >
> >  static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
> >  {
> > -       struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
> >         struct amdgpu_job *job = to_amdgpu_job(s_job);
> >
> >         drm_sched_job_cleanup(s_job);
> >
> > -       amdgpu_ring_priority_put(ring, s_job->s_priority);
> >         dma_fence_put(job->fence);
> >         amdgpu_sync_free(&job->sync);
> >         amdgpu_sync_free(&job->sched_sync);
> > @@ -143,7 +141,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
> >                       void *owner, struct dma_fence **f)
> >  {
> >         enum drm_sched_priority priority;
> > -       struct amdgpu_ring *ring;
> >         int r;
> >
> >         if (!f)
> > @@ -158,9 +155,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
> >         priority = job->base.s_priority;
> >         drm_sched_entity_push_job(&job->base, entity);
> >
> > -       ring = to_amdgpu_ring(entity->rq->sched);
> > -       amdgpu_ring_priority_get(ring, priority);
> > -
> >         return 0;
> >  }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > index 24caff085d00..34fcd467f18d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > @@ -222,6 +222,7 @@ struct amdgpu_ring {
> >         struct mutex            priority_mutex;
> >         /* protected by priority_mutex */
> >         int                     priority;
> > +       bool                    high_priority;
> >
> >  #if defined(CONFIG_DEBUG_FS)
> >         struct dentry *ent;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > index e63f98b2d389..4260becd569b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > @@ -4426,6 +4426,23 @@ static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
> >         return r;
> >  }
> >
> > +static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
> > +{
> > +       struct amdgpu_device *adev = ring->adev;
> > +
> > +       if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
> > +               if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
> > +                       mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
> > +                       ring->high_priority = true;
> > +                       mqd->cp_hqd_queue_priority =
> > +                               AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
> > +                       mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
>
> I think we can move this back to mqd_init().  It needs to be set up
> for both low and high priority queues.
>
> > +               }
> > +               else
> > +                       ring->high_priority = false;
> > +       }
> > +}
> > +
> >  static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
> >  {
> >         struct amdgpu_device *adev = ring->adev;
> > @@ -4549,9 +4566,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
> >         /* defaults */
> >         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
> >         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
> > -       mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
> > -       mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
> > -       mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
> >         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
> >         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
> >         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
> > @@ -4563,6 +4577,9 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
> >         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
> >         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
> >
> > +       /* set static priority for a queue/ring */
> > +       gfx_v8_0_mqd_set_priority(ring, mqd);
> > +
> >         /* map_queues packet doesn't need activate the queue,
> >          * so only kiq need set this field.
> >          */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > index 4135e4126e82..85a54849abd1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > @@ -3310,6 +3310,23 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
> >         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
> >  }
> >
> > +static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
> > +{
> > +       struct amdgpu_device *adev = ring->adev;
> > +
> > +       if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
> > +               if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
> > +                       mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
> > +                       ring->high_priority = true;
> > +                       mqd->cp_hqd_queue_priority =
> > +                               AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
> > +                       mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
>
> Same comment here.
>
> Alex
>
> > +               }
> > +               else
> > +                       ring->high_priority = false;
> > +       }
> > +}
> > +
> >  static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
> >  {
> >         struct amdgpu_device *adev = ring->adev;
> > @@ -3446,6 +3463,9 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
> >         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
> >         mqd->cp_hqd_ib_control = tmp;
> >
> > +       /* set static priority for a queue/ring */
> > +       gfx_v9_0_mqd_set_priority(ring, mqd);
> > +
> >         /* map_queues packet doesn't need activate the queue,
> >          * so only kiq need set this field.
> >          */
> > --
> > 2.25.0
> >
> > _______________________________________________
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 1/4] drm/amdgpu: set compute queue priority at mqd_init
  2020-02-27 21:50   ` Alex Deucher
@ 2020-02-27 21:56     ` Das, Nirmoy
  0 siblings, 0 replies; 15+ messages in thread
From: Das, Nirmoy @ 2020-02-27 21:56 UTC (permalink / raw)
  To: Alex Deucher, Nirmoy Das
  Cc: Deucher, Alexander, Huang, Ray, Koenig, Christian, amd-gfx list


[-- Attachment #1.1: Type: text/plain, Size: 15660 bytes --]

[AMD Official Use Only - Internal Distribution Only]

Yes I missed gfx_v10_0.c. I will resend a updated one tomorrow.

Regards,
Nirmoy
Get Outlook for Android<https://aka.ms/ghei36>
________________________________
From: Alex Deucher <alexdeucher@gmail.com>
Sent: Thursday, February 27, 2020 10:50:35 PM
To: Nirmoy Das <nirmoy.aiemd@gmail.com>
Cc: amd-gfx list <amd-gfx@lists.freedesktop.org>; Deucher, Alexander <Alexander.Deucher@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Das, Nirmoy <Nirmoy.Das@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>
Subject: Re: [RFC PATCH 1/4] drm/amdgpu: set compute queue priority at mqd_init

On Thu, Feb 27, 2020 at 4:48 PM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> On Thu, Feb 27, 2020 at 4:37 PM Nirmoy Das <nirmoy.aiemd@gmail.com> wrote:
> >
> > We were changing compute ring priority while rings were being used
> > before every job submission which is not recommended. This patch
> > sets compute queue priority at mqd initialization for gfx8 and gfx9.
> >
> > Policy: Enable high priority compute queues only if gpu has >1 MEC, if
> > so PIPE0 and PIPE1 will be in high priority.
>
> I think we still want high priority queues on even when there is only
> one MEC.  It might work better on multi-MEC chips, but it should still
> work on single MEC chips.
>
> >
> > high/normal priority compute sched list are generated from set of high/normal
> > priority compute queues. At context creation, entity of compute queue
> > get a sched list from high or normal priority depending on ctx->priority
> >
> > Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  4 ----
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  | 19 +++++++++++++++----
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c  | 14 ++++++++++++++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  | 12 ++++++++++++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  |  6 ------
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
> >  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c    | 23 ++++++++++++++++++++---
> >  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 20 ++++++++++++++++++++

Also, can you add a patch for gfx_v10_0.c?

Alex

> >  8 files changed, 82 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index f397ff97b4e4..8304d0c87899 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -1205,7 +1205,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> >         struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
> >         struct drm_sched_entity *entity = p->entity;
> >         enum drm_sched_priority priority;
> > -       struct amdgpu_ring *ring;
> >         struct amdgpu_bo_list_entry *e;
> >         struct amdgpu_job *job;
> >         uint64_t seq;
> > @@ -1258,9 +1257,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> >         priority = job->base.s_priority;
> >         drm_sched_entity_push_job(&job->base, entity);
> >
> > -       ring = to_amdgpu_ring(entity->rq->sched);
> > -       amdgpu_ring_priority_get(ring, priority);
> > -
> >         amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
> >
> >         ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> > index 94a6c42f29ea..a1742b1d4f9c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> > @@ -85,8 +85,14 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const
> >                         num_scheds = 1;
> >                         break;
> >                 case AMDGPU_HW_IP_COMPUTE:
> > -                       scheds = adev->gfx.compute_sched;
> > -                       num_scheds = adev->gfx.num_compute_sched;
> > +                       if (priority > DRM_SCHED_PRIORITY_NORMAL &&
> > +                           adev->gfx.num_compute_sched_high) {
> > +                               scheds = adev->gfx.compute_sched_high;
> > +                               num_scheds = adev->gfx.num_compute_sched_high;
> > +                       } else {
> > +                               scheds = adev->gfx.compute_sched;
> > +                               num_scheds = adev->gfx.num_compute_sched;
> > +                       }
> >                         break;
> >                 case AMDGPU_HW_IP_DMA:
> >                         scheds = adev->sdma.sdma_sched;
> > @@ -638,8 +644,13 @@ void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
> >         }
> >
> >         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> > -               adev->gfx.compute_sched[i] = &adev->gfx.compute_ring[i].sched;
> > -               adev->gfx.num_compute_sched++;
> > +               if (!adev->gfx.compute_ring[i].high_priority) {
> > +                       adev->gfx.compute_sched[adev->gfx.num_compute_sched++] =
> > +                               &adev->gfx.compute_ring[i].sched;
> > +               } else {
> > +                       adev->gfx.compute_sched_high[adev->gfx.num_compute_sched_high++] =
> > +                               &adev->gfx.compute_ring[i].sched;
> > +               }
> >         }
> >
> >         for (i = 0; i < adev->sdma.num_instances; i++) {
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > index 7403588684b3..bdea5d44edf4 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> > @@ -192,6 +192,20 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
> >         return adev->gfx.mec.num_mec > 1;
> >  }
> >
> > +bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
> > +                                              int queue)
> > +{
> > +       bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
> > +
> > +       /* only enable high priority queue if more than 1 MEC.
> > +        * When multipipe_policy is true amdgpu gets queue 0,1 from each pipe of
> > +        * 1st MEC. Policy: make queue 0 of each pipe as high priority compute queue */
> > +       if (multipipe_policy && queue == 0)
> > +               return true;
> > +
> > +       return false;
> > +}
> > +
> >  void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
> >  {
> >         int i, queue, pipe, mec;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > index 37ba05b63b2a..13ee0ea66c6f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > @@ -41,6 +41,14 @@
> >  #define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
> >  #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
> >
> > +#define AMDGPU_GFX_PIPE_PRIO_LOW    0
> > +#define AMDGPU_GFX_PIPE_PRIO_NORMAL 1
> > +#define AMDGPU_GFX_PIPE_PRIO_HIGH   2
> > +
> > +#define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM  0
> > +#define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM  15
> > +
> > +
> >  struct amdgpu_mec {
> >         struct amdgpu_bo        *hpd_eop_obj;
> >         u64                     hpd_eop_gpu_addr;
> > @@ -281,7 +289,9 @@ struct amdgpu_gfx {
> >         unsigned                        num_gfx_rings;
> >         struct amdgpu_ring              compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> >         struct drm_gpu_scheduler        *compute_sched[AMDGPU_MAX_COMPUTE_RINGS];
> > +       struct drm_gpu_scheduler        *compute_sched_high[AMDGPU_MAX_COMPUTE_RINGS];
> >         uint32_t                        num_compute_sched;
> > +       uint32_t                        num_compute_sched_high;
> >         unsigned                        num_compute_rings;
> >         struct amdgpu_irq_src           eop_irq;
> >         struct amdgpu_irq_src           priv_reg_irq;
> > @@ -363,6 +373,8 @@ void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
> >                                  int *mec, int *pipe, int *queue);
> >  bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
> >                                      int pipe, int queue);
> > +bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
> > +                                              int queue);
> >  int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
> >                                int pipe, int queue);
> >  void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > index d42be880a236..4981e443a884 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> > @@ -117,12 +117,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
> >
> >  static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
> >  {
> > -       struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
> >         struct amdgpu_job *job = to_amdgpu_job(s_job);
> >
> >         drm_sched_job_cleanup(s_job);
> >
> > -       amdgpu_ring_priority_put(ring, s_job->s_priority);
> >         dma_fence_put(job->fence);
> >         amdgpu_sync_free(&job->sync);
> >         amdgpu_sync_free(&job->sched_sync);
> > @@ -143,7 +141,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
> >                       void *owner, struct dma_fence **f)
> >  {
> >         enum drm_sched_priority priority;
> > -       struct amdgpu_ring *ring;
> >         int r;
> >
> >         if (!f)
> > @@ -158,9 +155,6 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
> >         priority = job->base.s_priority;
> >         drm_sched_entity_push_job(&job->base, entity);
> >
> > -       ring = to_amdgpu_ring(entity->rq->sched);
> > -       amdgpu_ring_priority_get(ring, priority);
> > -
> >         return 0;
> >  }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > index 24caff085d00..34fcd467f18d 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> > @@ -222,6 +222,7 @@ struct amdgpu_ring {
> >         struct mutex            priority_mutex;
> >         /* protected by priority_mutex */
> >         int                     priority;
> > +       bool                    high_priority;
> >
> >  #if defined(CONFIG_DEBUG_FS)
> >         struct dentry *ent;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > index e63f98b2d389..4260becd569b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> > @@ -4426,6 +4426,23 @@ static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
> >         return r;
> >  }
> >
> > +static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd)
> > +{
> > +       struct amdgpu_device *adev = ring->adev;
> > +
> > +       if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
> > +               if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
> > +                       mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
> > +                       ring->high_priority = true;
> > +                       mqd->cp_hqd_queue_priority =
> > +                               AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
> > +                       mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
>
> I think we can move this back to mqd_init().  It needs to be set up
> for both low and high priority queues.
>
> > +               }
> > +               else
> > +                       ring->high_priority = false;
> > +       }
> > +}
> > +
> >  static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
> >  {
> >         struct amdgpu_device *adev = ring->adev;
> > @@ -4549,9 +4566,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
> >         /* defaults */
> >         mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
> >         mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
> > -       mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
> > -       mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
> > -       mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
> >         mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
> >         mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
> >         mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
> > @@ -4563,6 +4577,9 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
> >         mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
> >         mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
> >
> > +       /* set static priority for a queue/ring */
> > +       gfx_v8_0_mqd_set_priority(ring, mqd);
> > +
> >         /* map_queues packet doesn't need activate the queue,
> >          * so only kiq need set this field.
> >          */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > index 4135e4126e82..85a54849abd1 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> > @@ -3310,6 +3310,23 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
> >         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
> >  }
> >
> > +static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
> > +{
> > +       struct amdgpu_device *adev = ring->adev;
> > +
> > +       if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
> > +               if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) {
> > +                       mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
> > +                       ring->high_priority = true;
> > +                       mqd->cp_hqd_queue_priority =
> > +                               AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
> > +                       mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
>
> Same comment here.
>
> Alex
>
> > +               }
> > +               else
> > +                       ring->high_priority = false;
> > +       }
> > +}
> > +
> >  static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
> >  {
> >         struct amdgpu_device *adev = ring->adev;
> > @@ -3446,6 +3463,9 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
> >         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
> >         mqd->cp_hqd_ib_control = tmp;
> >
> > +       /* set static priority for a queue/ring */
> > +       gfx_v9_0_mqd_set_priority(ring, mqd);
> > +
> >         /* map_queues packet doesn't need activate the queue,
> >          * so only kiq need set this field.
> >          */
> > --
> > 2.25.0
> >
> > _______________________________________________
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Cnirmoy.das%40amd.com%7C9cd63275980e426c420808d7bbcf1b33%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637184370519368178&amp;sdata=qvENGcFENLfXkn3HiA0yeGwo1N0dU2meZm51kD%2FdjsE%3D&amp;reserved=0

[-- Attachment #1.2: Type: text/html, Size: 31757 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 3/4] drm/amdgpu: change hw sched list on ctx priority override
  2020-02-27 21:40 ` [RFC PATCH 3/4] drm/amdgpu: change hw sched list on ctx priority override Nirmoy Das
@ 2020-02-28  4:17   ` Luben Tuikov
  0 siblings, 0 replies; 15+ messages in thread
From: Luben Tuikov @ 2020-02-28  4:17 UTC (permalink / raw)
  To: Nirmoy Das, amd-gfx
  Cc: alexander.deucher, Ray.Huang, nirmoy.das, christian.koenig

On 2020-02-27 4:40 p.m., Nirmoy Das wrote:
> Switch to appropriate sched list for an entity on priority override.
> 
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 54 ++++++++++++++++++++++++-
>  1 file changed, 53 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index a1742b1d4f9c..69a791430b25 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -508,11 +508,53 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
>  	return fence;
>  }
>  
> +static int amdgpu_ctx_change_sched(struct amdgpu_ctx *ctx,
> +				   struct amdgpu_ctx_entity *aentity,
> +				   int hw_ip, enum drm_sched_priority priority)
> +{
> +	struct amdgpu_device *adev = ctx->adev;
> +	struct drm_gpu_scheduler **scheds = NULL;
> +	unsigned num_scheds = 0;
> +
> +	switch (hw_ip) {
> +		case AMDGPU_HW_IP_COMPUTE:

NAK. Don't indent the "case".

LKCS says that "case" must not be indented:
https://www.kernel.org/doc/html/v4.10/process/coding-style.html#indentation

> +			if (priority > DRM_SCHED_PRIORITY_NORMAL &&
> +			    adev->gfx.num_compute_sched_high) {
> +				scheds = adev->gfx.compute_sched_high;
> +				num_scheds = adev->gfx.num_compute_sched_high;
> +			} else {
> +				scheds = adev->gfx.compute_sched;
> +				num_scheds = adev->gfx.num_compute_sched;
> +			}

I feel that this is a regression in that we're having an if-conditional
inside a switch-case. Could use just use maps to execute without
any branching?

Surely priority could be used as an index into a map of DRM_SCHED_PRIORITY_MAX
to find out which scheduler to use and the number thereof.

> +			break;
> +		default:
> +			return 0;
> +	}


> +
> +	return drm_sched_entity_modify_sched(&aentity->entity, scheds, num_scheds);
> +}
> +
> +static int amdgpu_ctx_hw_priority_override(struct amdgpu_ctx *ctx,
> +					    const u32 hw_ip,
> +					    enum drm_sched_priority priority)
> +{
> +	int r = 0, i;
> +
> +	for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
> +		if (!ctx->entities[hw_ip][i])
> +			continue;
> +		r = amdgpu_ctx_change_sched(ctx, ctx->entities[hw_ip][i],
> +					    hw_ip, priority);
> +	}
> +
> +	return r;
> +}
> +
>  void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>  				  enum drm_sched_priority priority)
>  {
>  	enum drm_sched_priority ctx_prio;
> -	unsigned i, j;
> +	unsigned r, i, j;
>  
>  	ctx->override_priority = priority;
>  
> @@ -521,11 +563,21 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>  	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
>  		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
>  			struct drm_sched_entity *entity;
> +			struct amdgpu_ring *ring;
>  
>  			if (!ctx->entities[i][j])
>  				continue;
>  
>  			entity = &ctx->entities[i][j]->entity;
> +			ring = to_amdgpu_ring(entity->rq->sched);
> +
> +			if (ring->high_priority) {
> +				r = amdgpu_ctx_hw_priority_override(ctx, i,
> +								    ctx_prio);
> +				if (r)
> +					DRM_ERROR("Failed to override HW priority for %s",
> +						  ring->name);
> +			}

I can't see this an an improvement when we add branching inside a for-loop.
Perhaps if we remove if-conditionals and use indexing to eliminate
branching?

Regards,
Luben

>  			drm_sched_entity_set_priority(entity, ctx_prio);
>  		}
>  	}
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 2/4] drm/scheduler: implement a function to modify sched list
  2020-02-27 21:40 ` [RFC PATCH 2/4] drm/scheduler: implement a function to modify sched list Nirmoy Das
@ 2020-02-28  5:08   ` Luben Tuikov
  2020-02-28  7:47     ` Christian König
  0 siblings, 1 reply; 15+ messages in thread
From: Luben Tuikov @ 2020-02-28  5:08 UTC (permalink / raw)
  To: Nirmoy Das, amd-gfx
  Cc: alexander.deucher, Ray.Huang, nirmoy.das, christian.koenig

On 2020-02-27 4:40 p.m., Nirmoy Das wrote:
> implement drm_sched_entity_modify_sched() which can modify existing
> sched_list with a different one. This is going to be helpful when
> userspace changes priority of a ctx/entity then driver can switch to
> corresponding hw shced list for that priority
> 
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
>  drivers/gpu/drm/scheduler/sched_entity.c | 24 ++++++++++++++++++++++++
>  include/drm/gpu_scheduler.h              |  4 ++++
>  2 files changed, 28 insertions(+)
> 
> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
> index 63bccd201b97..711e9d504bcb 100644
> --- a/drivers/gpu/drm/scheduler/sched_entity.c
> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
> @@ -83,6 +83,30 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
>  }
>  EXPORT_SYMBOL(drm_sched_entity_init);
>  
> +/**
> + * drm_sched_entity_modify_sched - Modify sched of an entity
> + *
> + * @entity: scheduler entity to init
> + * @sched_list: the list of new drm scheds which will replace
> + *		existing entity->sched_list
> + * @num_sched_list: number of drm sched in sched_list
> + *
> + * Returns 0 on success or a negative error code on failure.
> + */
> +int drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
> +				  struct drm_gpu_scheduler **sched_list,
> +				  unsigned int num_sched_list)
> +{
> +	if (!(entity && sched_list && (num_sched_list == 0 || sched_list[0])))
> +		return -EINVAL;

This seems unmaintainable. I'd write it in its natural form:

int drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
				  struct drm_gpu_scheduler **sched_list,
				  unsigned int num_sched_list)
{
	if (entity && sched_list && (num_sched_list == 0 || sched_list[0] != NULL)) {
		entity->sched_list = num_sched_list > 1 ? sched_list : NULL;
		entity->num_sched_list = num_sched_list;
		return 0;
	} else {
		return -EINVAL;
	}
}

That's too heavy. Can we improve the architecture
so we don't have to check for this in leaf functions like this one?
We can just return a parameterization.

Why would this be called with entity being NULL?
Or with sched_list being NULL? Or num_sched_list being not zero
yet sched_list[0] being NULL? Why not make sure that sched_list[0] is
never NULL and that num_sched_list is greater than 0 always?

Does this make it to user space?
Why would the state of execution be one such that this is true/false
for the code to return -EINVAL?
From patch 3/4 it seems that an error is printed inside amdgpu_ctx_priority_override()
and the result is not propagated up the stack.

I think we should improve the code where here this condition above
is never true. Then we can use parameterization for those two
statements below:

> +
> +	entity->sched_list = num_sched_list > 1 ? sched_list : NULL;

So if we're here, we know from the top conditional that
either num_sched_list is 0 or that sched_list[0] not NULL
or both.

So if num_sched_list is 0 or 1 we return NULL?
And if num_sched_list is 2 or greater we return sched_list
of which sched_list[0] could be NULL?

Why not fix the architecture so that this is simply copied? (in which case
we can simply index-parameterize it and simply copy it.
Why are there so many checks everywhere?

> +	entity->num_sched_list = num_sched_list;
> +

I mean, all we're doing in this function is initializing
entity->sched_list and entity->num_sched_list. Why does this
function have to be so complex and do so many checks?
Can we improve/fix the architecture instead?

Regards,
Luben


> +	return 0;
> +}
> +EXPORT_SYMBOL(drm_sched_entity_modify_sched);
> +
>  /**
>   * drm_sched_entity_is_idle - Check if entity is idle
>   *
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 589be851f8a1..0c164a96d51b 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -297,6 +297,10 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched);
>  int drm_sched_job_init(struct drm_sched_job *job,
>  		       struct drm_sched_entity *entity,
>  		       void *owner);
> +int drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
> +				  struct drm_gpu_scheduler **sched_list,
> +                                  unsigned int num_sched_list);
> +
>  void drm_sched_job_cleanup(struct drm_sched_job *job);
>  void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
>  void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 2/4] drm/scheduler: implement a function to modify sched list
  2020-02-28  5:08   ` Luben Tuikov
@ 2020-02-28  7:47     ` Christian König
  2020-02-28  9:30       ` Nirmoy
  2020-03-02 20:47       ` Luben Tuikov
  0 siblings, 2 replies; 15+ messages in thread
From: Christian König @ 2020-02-28  7:47 UTC (permalink / raw)
  To: Luben Tuikov, Nirmoy Das, amd-gfx
  Cc: alexander.deucher, Ray.Huang, nirmoy.das

Am 28.02.20 um 06:08 schrieb Luben Tuikov:
> On 2020-02-27 4:40 p.m., Nirmoy Das wrote:
>> implement drm_sched_entity_modify_sched() which can modify existing
>> sched_list with a different one. This is going to be helpful when
>> userspace changes priority of a ctx/entity then driver can switch to
>> corresponding hw shced list for that priority
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>> ---
>>   drivers/gpu/drm/scheduler/sched_entity.c | 24 ++++++++++++++++++++++++
>>   include/drm/gpu_scheduler.h              |  4 ++++
>>   2 files changed, 28 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
>> index 63bccd201b97..711e9d504bcb 100644
>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>> @@ -83,6 +83,30 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
>>   }
>>   EXPORT_SYMBOL(drm_sched_entity_init);
>>   
>> +/**
>> + * drm_sched_entity_modify_sched - Modify sched of an entity
>> + *
>> + * @entity: scheduler entity to init
>> + * @sched_list: the list of new drm scheds which will replace
>> + *		existing entity->sched_list
>> + * @num_sched_list: number of drm sched in sched_list
>> + *
>> + * Returns 0 on success or a negative error code on failure.
>> + */
>> +int drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
>> +				  struct drm_gpu_scheduler **sched_list,
>> +				  unsigned int num_sched_list)
>> +{
>> +	if (!(entity && sched_list && (num_sched_list == 0 || sched_list[0])))
>> +		return -EINVAL;
> This seems unmaintainable. I'd write it in its natural form:

This is probably just copy & pasted from the init function and complete 
overkill here.

>
> int drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
> 				  struct drm_gpu_scheduler **sched_list,
> 				  unsigned int num_sched_list)
> {
> 	if (entity && sched_list && (num_sched_list == 0 || sched_list[0] != NULL)) {
> 		entity->sched_list = num_sched_list > 1 ? sched_list : NULL;
> 		entity->num_sched_list = num_sched_list;
> 		return 0;
> 	} else {
> 		return -EINVAL;
> 	}
> }

Actually that's a rather bad idea. Error handling should always be in 
the form of:

if (check_error || missing_prerequisite)
     return_or_goto_cleanup;

> That's too heavy. Can we improve the architecture
> so we don't have to check for this in leaf functions like this one?
> We can just return a parameterization.
>
> Why would this be called with entity being NULL?
> Or with sched_list being NULL? Or num_sched_list being not zero
> yet sched_list[0] being NULL? Why not make sure that sched_list[0] is
> never NULL and that num_sched_list is greater than 0 always?
>
> Does this make it to user space?
> Why would the state of execution be one such that this is true/false
> for the code to return -EINVAL?
>  From patch 3/4 it seems that an error is printed inside amdgpu_ctx_priority_override()
> and the result is not propagated up the stack.
>
> I think we should improve the code where here this condition above
> is never true. Then we can use parameterization for those two
> statements below:
>
>> +
>> +	entity->sched_list = num_sched_list > 1 ? sched_list : NULL;
> So if we're here, we know from the top conditional that
> either num_sched_list is 0 or that sched_list[0] not NULL
> or both.
>
> So if num_sched_list is 0 or 1 we return NULL?
> And if num_sched_list is 2 or greater we return sched_list
> of which sched_list[0] could be NULL?

This is also copy&pasted from the init code and completely wrong here.

What we should do instead is just: WARN_ON(!num_sched_list || !sched_list);

And to the checking for not keeping around the scheduler list in the 
init function.

> Why not fix the architecture so that this is simply copied?

We had that and moved away from it because the scheduler list is 
actually const and shouldn't be allocated with each entity (which we can 
easily have thousands of).

Regards,
Christian.

>   (in which case
> we can simply index-parameterize it and simply copy it.
> Why are there so many checks everywhere?
>
>> +	entity->num_sched_list = num_sched_list;
>> +
> I mean, all we're doing in this function is initializing
> entity->sched_list and entity->num_sched_list. Why does this
> function have to be so complex and do so many checks?
> Can we improve/fix the architecture instead?
>
> Regards,
> Luben
>
>
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL(drm_sched_entity_modify_sched);
>> +
>>   /**
>>    * drm_sched_entity_is_idle - Check if entity is idle
>>    *
>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>> index 589be851f8a1..0c164a96d51b 100644
>> --- a/include/drm/gpu_scheduler.h
>> +++ b/include/drm/gpu_scheduler.h
>> @@ -297,6 +297,10 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched);
>>   int drm_sched_job_init(struct drm_sched_job *job,
>>   		       struct drm_sched_entity *entity,
>>   		       void *owner);
>> +int drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
>> +				  struct drm_gpu_scheduler **sched_list,
>> +                                  unsigned int num_sched_list);
>> +
>>   void drm_sched_job_cleanup(struct drm_sched_job *job);
>>   void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
>>   void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 2/4] drm/scheduler: implement a function to modify sched list
  2020-02-28  7:47     ` Christian König
@ 2020-02-28  9:30       ` Nirmoy
  2020-03-02 20:47       ` Luben Tuikov
  1 sibling, 0 replies; 15+ messages in thread
From: Nirmoy @ 2020-02-28  9:30 UTC (permalink / raw)
  To: Christian König, Luben Tuikov, Nirmoy Das, amd-gfx
  Cc: alexander.deucher, Ray.Huang, nirmoy.das


On 2/28/20 8:47 AM, Christian König wrote:
> Am 28.02.20 um 06:08 schrieb Luben Tuikov:
>> On 2020-02-27 4:40 p.m., Nirmoy Das wrote:
>>> implement drm_sched_entity_modify_sched() which can modify existing
>>> sched_list with a different one. This is going to be helpful when
>>> userspace changes priority of a ctx/entity then driver can switch to
>>> corresponding hw shced list for that priority
>>>
>>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>>> ---
>>>   drivers/gpu/drm/scheduler/sched_entity.c | 24 
>>> ++++++++++++++++++++++++
>>>   include/drm/gpu_scheduler.h              |  4 ++++
>>>   2 files changed, 28 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c 
>>> b/drivers/gpu/drm/scheduler/sched_entity.c
>>> index 63bccd201b97..711e9d504bcb 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>>> @@ -83,6 +83,30 @@ int drm_sched_entity_init(struct drm_sched_entity 
>>> *entity,
>>>   }
>>>   EXPORT_SYMBOL(drm_sched_entity_init);
>>>   +/**
>>> + * drm_sched_entity_modify_sched - Modify sched of an entity
>>> + *
>>> + * @entity: scheduler entity to init
>>> + * @sched_list: the list of new drm scheds which will replace
>>> + *        existing entity->sched_list
>>> + * @num_sched_list: number of drm sched in sched_list
>>> + *
>>> + * Returns 0 on success or a negative error code on failure.
>>> + */
>>> +int drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
>>> +                  struct drm_gpu_scheduler **sched_list,
>>> +                  unsigned int num_sched_list)
>>> +{
>>> +    if (!(entity && sched_list && (num_sched_list == 0 || 
>>> sched_list[0])))
>>> +        return -EINVAL;
>> This seems unmaintainable. I'd write it in its natural form:
>
> This is probably just copy & pasted from the init function and 
> complete overkill here.
Was indeed lame copy paste from my side.
>
>>
>> So if num_sched_list is 0 or 1 we return NULL?
>> And if num_sched_list is 2 or greater we return sched_list
>> of which sched_list[0] could be NULL?
>
> This is also copy&pasted from the init code and completely wrong here.
>
> What we should do instead is just: WARN_ON(!num_sched_list || 
> !sched_list);

Thanks, this will make it much simple. I will have this in next version.

Nirmoy


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 2/4] drm/scheduler: implement a function to modify sched list
  2020-02-28  7:47     ` Christian König
  2020-02-28  9:30       ` Nirmoy
@ 2020-03-02 20:47       ` Luben Tuikov
  2020-03-03 19:06         ` Christian König
  1 sibling, 1 reply; 15+ messages in thread
From: Luben Tuikov @ 2020-03-02 20:47 UTC (permalink / raw)
  To: Christian König, Nirmoy Das, amd-gfx
  Cc: alexander.deucher, luben.tuikov, Ray.Huang, nirmoy.das

On 2020-02-28 2:47 a.m., Christian König wrote:
> Am 28.02.20 um 06:08 schrieb Luben Tuikov:
>> On 2020-02-27 4:40 p.m., Nirmoy Das wrote:
>>> implement drm_sched_entity_modify_sched() which can modify existing
>>> sched_list with a different one. This is going to be helpful when
>>> userspace changes priority of a ctx/entity then driver can switch to
>>> corresponding hw shced list for that priority
>>>
>>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>>> ---
>>>   drivers/gpu/drm/scheduler/sched_entity.c | 24 ++++++++++++++++++++++++
>>>   include/drm/gpu_scheduler.h              |  4 ++++
>>>   2 files changed, 28 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
>>> index 63bccd201b97..711e9d504bcb 100644
>>> --- a/drivers/gpu/drm/scheduler/sched_entity.c
>>> +++ b/drivers/gpu/drm/scheduler/sched_entity.c
>>> @@ -83,6 +83,30 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
>>>   }
>>>   EXPORT_SYMBOL(drm_sched_entity_init);
>>>   
>>> +/**
>>> + * drm_sched_entity_modify_sched - Modify sched of an entity
>>> + *
>>> + * @entity: scheduler entity to init
>>> + * @sched_list: the list of new drm scheds which will replace
>>> + *		existing entity->sched_list
>>> + * @num_sched_list: number of drm sched in sched_list
>>> + *
>>> + * Returns 0 on success or a negative error code on failure.
>>> + */
>>> +int drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
>>> +				  struct drm_gpu_scheduler **sched_list,
>>> +				  unsigned int num_sched_list)
>>> +{
>>> +	if (!(entity && sched_list && (num_sched_list == 0 || sched_list[0])))
>>> +		return -EINVAL;
>> This seems unmaintainable. I'd write it in its natural form:
> 
> This is probably just copy & pasted from the init function and complete 
> overkill here.

It should be an easy rejection then. Statements like this make
the code unmaintainable. Regardless of whether it was copy-and-pasted
I wanted to emphasize the lack of simplification of what
was being done.

We need to put intention and sense into what we're doing, scrutinizing
every line we put into a patch. This is why I suggested
the simplification here:

> 
>>
>> int drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
>> 				  struct drm_gpu_scheduler **sched_list,
>> 				  unsigned int num_sched_list)
>> {
>> 	if (entity && sched_list && (num_sched_list == 0 || sched_list[0] != NULL)) {
>> 		entity->sched_list = num_sched_list > 1 ? sched_list : NULL;
>> 		entity->num_sched_list = num_sched_list;
>> 		return 0;
>> 	} else {
>> 		return -EINVAL;
>> 	}
>> }
> 
> Actually that's a rather bad idea. Error handling should always be in 

I actually don't think that it is a "rather bad idea". At all.
I actually think that it makes this leaf function more clear to
understand as the conditional would read like a sentence in prose.

> the form of:
> 
> if (check_error || missing_prerequisite)
>      return_or_goto_cleanup;

I don't think we should generalize across the board. We should be
more flexible in order to create clear and maintainable code.

> 
>> That's too heavy. Can we improve the architecture
>> so we don't have to check for this in leaf functions like this one?
>> We can just return a parameterization.
>>
>> Why would this be called with entity being NULL?
>> Or with sched_list being NULL? Or num_sched_list being not zero
>> yet sched_list[0] being NULL? Why not make sure that sched_list[0] is
>> never NULL and that num_sched_list is greater than 0 always?
>>
>> Does this make it to user space?
>> Why would the state of execution be one such that this is true/false
>> for the code to return -EINVAL?
>>  From patch 3/4 it seems that an error is printed inside amdgpu_ctx_priority_override()
>> and the result is not propagated up the stack.
>>
>> I think we should improve the code where here this condition above
>> is never true. Then we can use parameterization for those two
>> statements below:
>>
>>> +
>>> +	entity->sched_list = num_sched_list > 1 ? sched_list : NULL;
>> So if we're here, we know from the top conditional that
>> either num_sched_list is 0 or that sched_list[0] not NULL
>> or both.
>>
>> So if num_sched_list is 0 or 1 we return NULL?
>> And if num_sched_list is 2 or greater we return sched_list
>> of which sched_list[0] could be NULL?
> 
> This is also copy&pasted from the init code and completely wrong here.

So even more reasons to reject this patch.

> 
> What we should do instead is just: WARN_ON(!num_sched_list || !sched_list);

Again, what does that *mean*? What does the check mean and what
does the num_sched_list == 0 or sched_list == NULL mean?
And how did we get into a situation like this where either or both
could be nil?

Wouldn't it be better to simplify or re-architecture this (we only recently
decided to hide physical rings from user-space), so that the code
is elegant (meaning no if-else) yet flexible and straightforward?

> 
> And to the checking for not keeping around the scheduler list in the 
> init function.
> 
>> Why not fix the architecture so that this is simply copied?
> 
> We had that and moved away from it because the scheduler list is 
> actually const and shouldn't be allocated with each entity (which we can 
> easily have thousands of).

I think that peppering the code with if-else conditionals
everywhere as these patch-series into the DRM scheduler have been,
would make the code unmaintainable in the long run.

Regards,
Luben

> 
> Regards,
> Christian.
> 
>>   (in which case
>> we can simply index-parameterize it and simply copy it.
>> Why are there so many checks everywhere?
>>
>>> +	entity->num_sched_list = num_sched_list;
>>> +
>> I mean, all we're doing in this function is initializing
>> entity->sched_list and entity->num_sched_list. Why does this
>> function have to be so complex and do so many checks?
>> Can we improve/fix the architecture instead?
>>
>> Regards,
>> Luben
>>
>>
>>> +	return 0;
>>> +}
>>> +EXPORT_SYMBOL(drm_sched_entity_modify_sched);
>>> +
>>>   /**
>>>    * drm_sched_entity_is_idle - Check if entity is idle
>>>    *
>>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
>>> index 589be851f8a1..0c164a96d51b 100644
>>> --- a/include/drm/gpu_scheduler.h
>>> +++ b/include/drm/gpu_scheduler.h
>>> @@ -297,6 +297,10 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched);
>>>   int drm_sched_job_init(struct drm_sched_job *job,
>>>   		       struct drm_sched_entity *entity,
>>>   		       void *owner);
>>> +int drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
>>> +				  struct drm_gpu_scheduler **sched_list,
>>> +                                  unsigned int num_sched_list);
>>> +
>>>   void drm_sched_job_cleanup(struct drm_sched_job *job);
>>>   void drm_sched_wakeup(struct drm_gpu_scheduler *sched);
>>>   void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad);
>>>
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 2/4] drm/scheduler: implement a function to modify sched list
  2020-03-02 20:47       ` Luben Tuikov
@ 2020-03-03 19:06         ` Christian König
  2020-03-03 20:04           ` Luben Tuikov
  0 siblings, 1 reply; 15+ messages in thread
From: Christian König @ 2020-03-03 19:06 UTC (permalink / raw)
  To: Luben Tuikov, Nirmoy Das, amd-gfx
  Cc: alexander.deucher, Ray.Huang, nirmoy.das

Am 02.03.20 um 21:47 schrieb Luben Tuikov:
> On 2020-02-28 2:47 a.m., Christian König wrote:
>> Am 28.02.20 um 06:08 schrieb Luben Tuikov:
>>> On 2020-02-27 4:40 p.m., Nirmoy Das wrote:
>>>> [SNIP]
>>>> +	if (!(entity && sched_list && (num_sched_list == 0 || sched_list[0])))
>>>> +		return -EINVAL;
>>> This seems unmaintainable. I'd write it in its natural form:
>> This is probably just copy & pasted from the init function and complete
>> overkill here.
> It should be an easy rejection then. Statements like this make
> the code unmaintainable. Regardless of whether it was copy-and-pasted
> I wanted to emphasize the lack of simplification of what
> was being done.

The problem is even deeper. As you noticed as well this is checking for 
in kernel coding error and not application errors.

That check shouldn't have been in the init function in the first place, 
but nobody had time to look into that so far.

> We need to put intention and sense into what we're doing, scrutinizing
> every line we put into a patch. This is why I suggested
> the simplification here:
>
>>> int drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
>>> 				  struct drm_gpu_scheduler **sched_list,
>>> 				  unsigned int num_sched_list)
>>> {
>>> 	if (entity && sched_list && (num_sched_list == 0 || sched_list[0] != NULL)) {
>>> 		entity->sched_list = num_sched_list > 1 ? sched_list : NULL;
>>> 		entity->num_sched_list = num_sched_list;
>>> 		return 0;
>>> 	} else {
>>> 		return -EINVAL;
>>> 	}
>>> }
>> Actually that's a rather bad idea. Error handling should always be in
> I actually don't think that it is a "rather bad idea". At all.
> I actually think that it makes this leaf function more clear to
> understand as the conditional would read like a sentence in prose.

The condition is indeed easier to read, but for the sacrifice of earlier 
return and keeping prerequisite checking out of the code.

> [SNIP]
>> What we should do instead is just: WARN_ON(!num_sched_list || !sched_list);
> Again, what does that *mean*? What does the check mean and what
> does the num_sched_list == 0 or sched_list == NULL mean?
> And how did we get into a situation like this where either or both
> could be nil?

It's an in kernel coding error to do this. The caller should at least 
always provide a list with some entries in it.

A WARN_ON() is appropriate since it helps to narrows down the incorrect 
behavior following from that.

> Wouldn't it be better to simplify or re-architecture this (we only recently
> decided to hide physical rings from user-space), so that the code
> is elegant (meaning no if-else) yet flexible and straightforward?

That was not recently at all, hiding physical rings was done nearly 5 
years ago shortly after the driver was initially released.

>>> Why not fix the architecture so that this is simply copied?
>> We had that and moved away from it because the scheduler list is
>> actually const and shouldn't be allocated with each entity (which we can
>> easily have thousands of).
> I think that peppering the code with if-else conditionals
> everywhere as these patch-series into the DRM scheduler have been,
> would make the code unmaintainable in the long run.

That's something I can agree on. Using a switch to map the priority to 
the backend implementation seems like the best idea to me.

E.g. function amdgpu_to_sched_priority() should not only map the IOCTL 
values to the scheduler values, but also return the array which hw rings 
to use.

Regards,
Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [RFC PATCH 2/4] drm/scheduler: implement a function to modify sched list
  2020-03-03 19:06         ` Christian König
@ 2020-03-03 20:04           ` Luben Tuikov
  0 siblings, 0 replies; 15+ messages in thread
From: Luben Tuikov @ 2020-03-03 20:04 UTC (permalink / raw)
  To: Christian König, Nirmoy Das, amd-gfx
  Cc: alexander.deucher, Ray.Huang, nirmoy.das

On 2020-03-03 14:06, Christian König wrote:
> Am 02.03.20 um 21:47 schrieb Luben Tuikov:
>> On 2020-02-28 2:47 a.m., Christian König wrote:
>>> Am 28.02.20 um 06:08 schrieb Luben Tuikov:
>>>> On 2020-02-27 4:40 p.m., Nirmoy Das wrote:
>>>>> [SNIP]
>>>>> +	if (!(entity && sched_list && (num_sched_list == 0 || sched_list[0])))
>>>>> +		return -EINVAL;
>>>> This seems unmaintainable. I'd write it in its natural form:
>>> This is probably just copy & pasted from the init function and complete
>>> overkill here.
>> It should be an easy rejection then. Statements like this make
>> the code unmaintainable. Regardless of whether it was copy-and-pasted
>> I wanted to emphasize the lack of simplification of what
>> was being done.
> 
> The problem is even deeper. As you noticed as well this is checking for 
> in kernel coding error and not application errors.

We shouldn't check the in-kernel implementation itself. If the kernel
did this everywhere, it'd be twice the size. The kernel shouldn't be
second-guessing itself.

Another way to see this is to ask "what would the kernel do here
if XYZ was NULL", for instance?  For userspace, it's clear. For
the kernel, it shows inconsistent and incomplete implmentation--something
which should be fixed.

Another way of seeing this is, if you break out a function into separate
smaller functions, checking the input parameters in those leaf/helper
functions (code which had been part of the original big function), would
make the code larger and second-guessing itself.

That check shouldn't be there. The implementation should be consistent
and complete in order to show an elegant code.

> 
> That check shouldn't have been in the init function in the first place, 
> but nobody had time to look into that so far.
> 
>> We need to put intention and sense into what we're doing, scrutinizing
>> every line we put into a patch. This is why I suggested
>> the simplification here:
>>
>>>> int drm_sched_entity_modify_sched(struct drm_sched_entity *entity,
>>>> 				  struct drm_gpu_scheduler **sched_list,
>>>> 				  unsigned int num_sched_list)
>>>> {
>>>> 	if (entity && sched_list && (num_sched_list == 0 || sched_list[0] != NULL)) {
>>>> 		entity->sched_list = num_sched_list > 1 ? sched_list : NULL;
>>>> 		entity->num_sched_list = num_sched_list;
>>>> 		return 0;
>>>> 	} else {
>>>> 		return -EINVAL;
>>>> 	}
>>>> }
>>> Actually that's a rather bad idea. Error handling should always be in
>> I actually don't think that it is a "rather bad idea". At all.
>> I actually think that it makes this leaf function more clear to
>> understand as the conditional would read like a sentence in prose.
> 
> The condition is indeed easier to read, but for the sacrifice of earlier 
> return and keeping prerequisite checking out of the code.

You know the compiler can reorder such code and invert
this simple-for-the-compiler conditional. It is easier
for human readers who might need to maintain it.

For instance, when you asked "Do you guys remember why we
might not have a job here?" for amdgpu_ib_schedule()
in a recent email--if that code had been split into code for 
test purposes and real IB processing code, then that question
wouldn't even be on the table.

We need to achieve a balance of breaking out code and if-else
statements. At the moment the code show everything bunched
up into a single function and a ton of if-else statements,
with the pretense "to avoid duplication". Such duplication can be
avoided architecturally by redefining structures, what's in them,
and what arguments functions take.

> 
>> [SNIP]
>>> What we should do instead is just: WARN_ON(!num_sched_list || !sched_list);
>> Again, what does that *mean*? What does the check mean and what
>> does the num_sched_list == 0 or sched_list == NULL mean?
>> And how did we get into a situation like this where either or both
>> could be nil?
> 
> It's an in kernel coding error to do this. The caller should at least 
> always provide a list with some entries in it.
> 
> A WARN_ON() is appropriate since it helps to narrows down the incorrect 
> behavior following from that.
> 
>> Wouldn't it be better to simplify or re-architecture this (we only recently
>> decided to hide physical rings from user-space), so that the code
>> is elegant (meaning no if-else) yet flexible and straightforward?
> 
> That was not recently at all, hiding physical rings was done nearly 5 
> years ago shortly after the driver was initially released.
> 
>>>> Why not fix the architecture so that this is simply copied?
>>> We had that and moved away from it because the scheduler list is
>>> actually const and shouldn't be allocated with each entity (which we can
>>> easily have thousands of).
>> I think that peppering the code with if-else conditionals
>> everywhere as these patch-series into the DRM scheduler have been,
>> would make the code unmaintainable in the long run.
> 
> That's something I can agree on. Using a switch to map the priority to 
> the backend implementation seems like the best idea to me.

Do you mean you don't agree with anything I write? (If you have
to make a point of it here that *that* is something you can agree on?

I'd say, even a function is too heavy. A straightforward map should
be used, no function call, no switch (if-else in disguise), just
a map for instant lookup.

Regards,
Luben


> 
> E.g. function amdgpu_to_sched_priority() should not only map the IOCTL 
> values to the scheduler values, but also return the array which hw rings 
> to use.
> 
> Regards,
> Christian.
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2020-03-03 20:04 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-27 21:40 [RFC PATCH 1/4] drm/amdgpu: set compute queue priority at mqd_init Nirmoy Das
2020-02-27 21:40 ` [RFC PATCH 2/4] drm/scheduler: implement a function to modify sched list Nirmoy Das
2020-02-28  5:08   ` Luben Tuikov
2020-02-28  7:47     ` Christian König
2020-02-28  9:30       ` Nirmoy
2020-03-02 20:47       ` Luben Tuikov
2020-03-03 19:06         ` Christian König
2020-03-03 20:04           ` Luben Tuikov
2020-02-27 21:40 ` [RFC PATCH 3/4] drm/amdgpu: change hw sched list on ctx priority override Nirmoy Das
2020-02-28  4:17   ` Luben Tuikov
2020-02-27 21:40 ` [RFC PATCH 4/4] drm/amdgpu: remove unused functions Nirmoy Das
2020-02-27 21:42 ` [RFC PATCH 1/4] drm/amdgpu: set compute queue priority at mqd_init Nirmoy
2020-02-27 21:48 ` Alex Deucher
2020-02-27 21:50   ` Alex Deucher
2020-02-27 21:56     ` Das, Nirmoy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.