[PATCH] LRU map compute/SDMA user ring ids to kernel ring ids v2

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] LRU map compute/SDMA user ring ids to kernel ring ids v2
@ 2017-04-21 23:48 Andres Rodriguez
       [not found] ` <20170421234836.18620-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Andres Rodriguez @ 2017-04-21 23:48 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

V2 updates:
 * Rebased
 * All patches now have r-b

Second part of the split of the series:
Add support for high priority scheduling in amdgpu v8

These patches should be close to being good enough to land.

The first two patches are simple fixes I've ported from the ROCm branch. These
still need review.

I've fixed all of Christian's comments for patch 04:
drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/6] drm/amdgpu: condense mqd programming sequence
       [not found] ` <20170421234836.18620-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-04-21 23:48   ` Andres Rodriguez
  2017-04-21 23:48   ` [PATCH 2/6] drm/amdgpu: workaround tonga HW bug in HQD " Andres Rodriguez
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Andres Rodriguez @ 2017-04-21 23:48 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

The MQD structure matches the reg layout. Take advantage of this to
simplify HQD programming.

Note that the ACTIVE field still needs to be programmed last.

Suggested-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 44 +++++-------------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 84 +++++------------------------------
 2 files changed, 23 insertions(+), 105 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index d5209d0..e621372 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -3120,81 +3120,59 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
 	mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
 	mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
 	mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
 	mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
 	mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
 	mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
 	mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
 	mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
 	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
 	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
 	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
 	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
 	mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
 
 	/* activate the queue */
 	mqd->cp_hqd_active = 1;
 }
 
 int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
 {
-	u32 tmp;
+	uint32_t tmp;
+	uint32_t mqd_reg;
+	uint32_t *mqd_data;
+
+	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_MQD_CONTROL */
+	mqd_data = &mqd->cp_mqd_base_addr_lo;
 
 	/* disable wptr polling */
 	tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
 	tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
 	WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
 
-	/* program MQD field to HW */
-	WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
-	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-	WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
-	WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
-	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
-	WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi);
-	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
-	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
-	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
-
-	WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control);
-	WREG32(mmCP_HQD_IB_BASE_ADDR, mqd->cp_hqd_ib_base_addr_lo);
-	WREG32(mmCP_HQD_IB_BASE_ADDR_HI, mqd->cp_hqd_ib_base_addr_hi);
-	WREG32(mmCP_HQD_IB_RPTR, mqd->cp_hqd_ib_rptr);
-	WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
-	WREG32(mmCP_HQD_SEMA_CMD, mqd->cp_hqd_sema_cmd);
-	WREG32(mmCP_HQD_MSG_TYPE, mqd->cp_hqd_msg_type);
-	WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, mqd->cp_hqd_atomic0_preop_lo);
-	WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, mqd->cp_hqd_atomic0_preop_hi);
-	WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, mqd->cp_hqd_atomic1_preop_lo);
-	WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, mqd->cp_hqd_atomic1_preop_hi);
-	WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
-	WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum);
-	WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority);
-	WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority);
-	WREG32(mmCP_HQD_IQ_RPTR, mqd->cp_hqd_iq_rptr);
+	/* program all HQD registers */
+	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++)
+		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
 	/* activate the HQD */
-	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
+	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
+		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
 	return 0;
 }
 
 static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
 {
 	int r;
 	u64 mqd_gpu_addr;
 	struct cik_mqd *mqd;
 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
 
 	if (ring->mqd_obj == NULL) {
 		r = amdgpu_bo_create(adev,
 				sizeof(struct cik_mqd),
 				PAGE_SIZE, true,
 				AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				&ring->mqd_obj);
 		if (r) {
 			dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
 			return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 11e6d47c..89626bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5024,116 +5024,56 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
 	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
 	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
 	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
 	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
 	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
 	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
 	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
 	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
 	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
 
 	/* activate the queue */
 	mqd->cp_hqd_active = 1;
 
 	return 0;
 }
 
 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
 			struct vi_mqd *mqd)
 {
-	/* disable wptr polling */
-	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
-
-	WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
-	WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
-
-	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-	WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
-
-	/* enable doorbell? */
-	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
+	uint32_t mqd_reg;
+	uint32_t *mqd_data;
 
-	/* set pq read/write pointers */
-	WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
-	WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
-	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
+	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
+	mqd_data = &mqd->cp_mqd_base_addr_lo;
 
-	/* set the pointer to the MQD */
-	WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
-	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-
-	/* set MQD vmid to 0 */
-	WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
-
-	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
-	WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
-	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
-
-	/* set up the HQD, this is similar to CP_RB0_CNTL */
-	WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
-
-	/* set the wb address whether it's enabled or not */
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
-				mqd->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-				mqd->cp_hqd_pq_rptr_report_addr_hi);
-
-	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
-
-	/* enable the doorbell if requested */
-	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
-
-	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
-	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
-	WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
-	WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
-
-	/* set the HQD priority */
-	WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority);
-	WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority);
-	WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum);
-
-	/* set cwsr save area */
-	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, mqd->cp_hqd_ctx_save_base_addr_lo);
-	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, mqd->cp_hqd_ctx_save_base_addr_hi);
-	WREG32(mmCP_HQD_CTX_SAVE_CONTROL, mqd->cp_hqd_ctx_save_control);
-	WREG32(mmCP_HQD_CNTL_STACK_OFFSET, mqd->cp_hqd_cntl_stack_offset);
-	WREG32(mmCP_HQD_CNTL_STACK_SIZE, mqd->cp_hqd_cntl_stack_size);
-	WREG32(mmCP_HQD_WG_STATE_OFFSET, mqd->cp_hqd_wg_state_offset);
-	WREG32(mmCP_HQD_CTX_SAVE_SIZE, mqd->cp_hqd_ctx_save_size);
-
-	WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control);
-	WREG32(mmCP_HQD_EOP_EVENTS, mqd->cp_hqd_eop_done_events);
-	WREG32(mmCP_HQD_ERROR, mqd->cp_hqd_error);
-	WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
-	WREG32(mmCP_HQD_EOP_DONES, mqd->cp_hqd_eop_dones);
-
-	/* set the vmid for the queue */
-	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
+	/* disable wptr polling */
+	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
 
-	WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
+	/* program all HQD registers */
+	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
+		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
-	/* activate the queue */
-	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
+	/* activate the HQD */
+	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
+		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
 	return 0;
 }
 
 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 {
 	int r = 0;
 	struct amdgpu_device *adev = ring->adev;
 	struct vi_mqd *mqd = ring->mqd_ptr;
 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
 
 	gfx_v8_0_kiq_setting(ring);
 
 	if (adev->gfx.in_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
 
 		/* reset ring buffer */
 		ring->wptr = 0;
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/6] drm/amdgpu: workaround tonga HW bug in HQD programming sequence
       [not found] ` <20170421234836.18620-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-04-21 23:48   ` [PATCH 1/6] drm/amdgpu: condense mqd programming sequence Andres Rodriguez
@ 2017-04-21 23:48   ` Andres Rodriguez
  2017-04-21 23:48   ` [PATCH 3/6] drm/amdgpu: untie user ring ids from kernel ring ids v4 Andres Rodriguez
                     ` (4 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Andres Rodriguez @ 2017-04-21 23:48 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Jay Cornwall, andresx7-Re5JQEeQqe8AvxtiuMwx3w

Tonga based asics may experience hangs when an HQD's EOP parameters
are modified.

Workaround this HW issue by avoiding writes to these registers for
tonga asics.

Based on the following ROCm commit:
2a0fb8 - drm/amdgpu: Synchronize KFD HQD load protocol with CP scheduler

From the ROCm git repository:
https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver.git

CC: Jay Cornwall <Jay.Cornwall@amd.com>
Suggested-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 89626bc..e01109e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5034,41 +5034,55 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 
 	/* activate the queue */
 	mqd->cp_hqd_active = 1;
 
 	return 0;
 }
 
 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
 			struct vi_mqd *mqd)
 {
 	uint32_t mqd_reg;
 	uint32_t *mqd_data;
 
 	/* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */
 	mqd_data = &mqd->cp_mqd_base_addr_lo;
 
 	/* disable wptr polling */
 	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
 
 	/* program all HQD registers */
-	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
+	for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++)
+		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
+
+	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
+	 * This is safe since EOP RPTR==WPTR for any inactive HQD
+	 * on ASICs that do not support context-save.
+	 * EOP writes/reads can start anywhere in the ring.
+	 */
+	if (adev->asic_type != CHIP_TONGA) {
+		WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
+		WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
+		WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
+	}
+
+	for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++)
 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
 	/* activate the HQD */
 	for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++)
 		WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]);
 
 	return 0;
 }
 
 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 {
 	int r = 0;
 	struct amdgpu_device *adev = ring->adev;
 	struct vi_mqd *mqd = ring->mqd_ptr;
 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
 
 	gfx_v8_0_kiq_setting(ring);
 
 	if (adev->gfx.in_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/6] drm/amdgpu: untie user ring ids from kernel ring ids v4
       [not found] ` <20170421234836.18620-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-04-21 23:48   ` [PATCH 1/6] drm/amdgpu: condense mqd programming sequence Andres Rodriguez
  2017-04-21 23:48   ` [PATCH 2/6] drm/amdgpu: workaround tonga HW bug in HQD " Andres Rodriguez
@ 2017-04-21 23:48   ` Andres Rodriguez
  2017-04-21 23:48   ` [PATCH 4/6] drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4 Andres Rodriguez
                     ` (3 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Andres Rodriguez @ 2017-04-21 23:48 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Add amdgpu_queue_mgr, a mechanism that allows disjointing usermode's
ring ids from the kernel's ring ids.

The queue manager maintains a per-file descriptor map of user ring ids
to amdgpu_ring pointers. Once a map is created it is permanent (this is
required to maintain FIFO execution guarantees for a context's ring).

Different queue map policies can be configured for each HW IP.
Currently all HW IPs use the identity mapper, i.e. kernel ring id is
equal to the user ring id.

The purpose of this mechanism is to distribute the load across multiple
queues more effectively for HW IPs that support multiple rings.
Userspace clients are unable to check whether a specific resource is in
use by a different client. Therefore, it is up to the kernel driver to
make the optimal choice.

v2: remove amdgpu_queue_mapper_funcs
v3: made amdgpu_queue_mgr per context instead of per-fd
v4: add context_put on error paths

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile           |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  27 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 117 +++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c       |   6 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 230 ++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c      |  45 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h      |   2 +
 7 files changed, 335 insertions(+), 95 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 660786a..dd48eb2 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -7,41 +7,42 @@ FULL_AMD_PATH=$(src)/..
 ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \
 	-I$(FULL_AMD_PATH)/include \
 	-I$(FULL_AMD_PATH)/amdgpu \
 	-I$(FULL_AMD_PATH)/scheduler \
 	-I$(FULL_AMD_PATH)/powerplay/inc \
 	-I$(FULL_AMD_PATH)/acp/include
 
 amdgpu-y := amdgpu_drv.o
 
 # add KMS driver
 amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_atombios.o atombios_crtc.o amdgpu_connectors.o \
 	atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \
 	amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \
 	amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \
 	amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
 	amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
 	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
-	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o
+	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
+	amdgpu_queue_mgr.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
 	ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \
 	amdgpu_amdkfd_gfx_v7.o
 
 amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
 
 amdgpu-y += \
 	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o
 
 # add GMC block
 amdgpu-y += \
 	gmc_v7_0.o \
 	gmc_v8_0.o \
 	gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o
 
 # add IH block
 amdgpu-y += \
 	amdgpu_irq.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a02b08a..7eb6c9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -756,52 +756,76 @@ struct amdgpu_ib {
 	uint32_t			length_dw;
 	uint64_t			gpu_addr;
 	uint32_t			*ptr;
 	uint32_t			flags;
 };
 
 extern const struct amd_sched_backend_ops amdgpu_sched_ops;
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 		     struct amdgpu_job **job, struct amdgpu_vm *vm);
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 			     struct amdgpu_job **job);
 
 void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_free(struct amdgpu_job *job);
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
 		      struct dma_fence **f);
 
 /*
+ * Queue manager
+ */
+struct amdgpu_queue_mapper {
+	int 		hw_ip;
+	struct mutex	lock;
+	/* protected by lock */
+	struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS];
+};
+
+struct amdgpu_queue_mgr {
+	struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM];
+};
+
+int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
+			  struct amdgpu_queue_mgr *mgr);
+int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
+			  struct amdgpu_queue_mgr *mgr);
+int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
+			 struct amdgpu_queue_mgr *mgr,
+			 int hw_ip, int instance, int ring,
+			 struct amdgpu_ring **out_ring);
+
+/*
  * context related structures
  */
 
 struct amdgpu_ctx_ring {
 	uint64_t		sequence;
 	struct dma_fence	**fences;
 	struct amd_sched_entity	entity;
 };
 
 struct amdgpu_ctx {
 	struct kref		refcount;
 	struct amdgpu_device    *adev;
+	struct amdgpu_queue_mgr queue_mgr;
 	unsigned		reset_counter;
 	spinlock_t		ring_lock;
 	struct dma_fence	**fences;
 	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
 	bool preamble_presented;
 };
 
 struct amdgpu_ctx_mgr {
 	struct amdgpu_device	*adev;
 	struct mutex		lock;
 	/* protected by lock */
 	struct idr		ctx_handles;
 };
 
 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
 int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
 
 uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
 			      struct dma_fence *fence);
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
@@ -1865,43 +1889,40 @@ static inline bool amdgpu_is_mec_queue_enabled(struct amdgpu_device *adev,
 #define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos))
 #define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
 #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
 #define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s))
 #define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s))
 #define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b))
 #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
 #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
 #define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
 #define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a))
 #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
 
 /* Common functions */
 int amdgpu_gpu_reset(struct amdgpu_device *adev);
 bool amdgpu_need_backup(struct amdgpu_device *adev);
 void amdgpu_pci_config_reset(struct amdgpu_device *adev);
 bool amdgpu_need_post(struct amdgpu_device *adev);
 void amdgpu_update_display_priority(struct amdgpu_device *adev);
 
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
-int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
-		       u32 ip_instance, u32 ring,
-		       struct amdgpu_ring **out_ring);
 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 				     uint32_t flags);
 bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
 struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end);
 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
 				       int *last_invalidated);
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 				 struct ttm_mem_reg *mem);
 void amdgpu_vram_location(struct amdgpu_device *adev, struct amdgpu_mc *mc, u64 base);
 void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc);
 void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size);
 int amdgpu_ttm_init(struct amdgpu_device *adev);
 void amdgpu_ttm_fini(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ec71b93..3845965 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -13,112 +13,40 @@
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  *
  * Authors:
  *    Jerome Glisse <glisse@freedesktop.org>
  */
 #include <linux/pagemap.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
-int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
-		       u32 ip_instance, u32 ring,
-		       struct amdgpu_ring **out_ring)
-{
-	/* Right now all IPs have only one instance - multiple rings. */
-	if (ip_instance != 0) {
-		DRM_ERROR("invalid ip instance: %d\n", ip_instance);
-		return -EINVAL;
-	}
-
-	switch (ip_type) {
-	default:
-		DRM_ERROR("unknown ip type: %d\n", ip_type);
-		return -EINVAL;
-	case AMDGPU_HW_IP_GFX:
-		if (ring < adev->gfx.num_gfx_rings) {
-			*out_ring = &adev->gfx.gfx_ring[ring];
-		} else {
-			DRM_ERROR("only %d gfx rings are supported now\n",
-				  adev->gfx.num_gfx_rings);
-			return -EINVAL;
-		}
-		break;
-	case AMDGPU_HW_IP_COMPUTE:
-		if (ring < adev->gfx.num_compute_rings) {
-			*out_ring = &adev->gfx.compute_ring[ring];
-		} else {
-			DRM_ERROR("only %d compute rings are supported now\n",
-				  adev->gfx.num_compute_rings);
-			return -EINVAL;
-		}
-		break;
-	case AMDGPU_HW_IP_DMA:
-		if (ring < adev->sdma.num_instances) {
-			*out_ring = &adev->sdma.instance[ring].ring;
-		} else {
-			DRM_ERROR("only %d SDMA rings are supported\n",
-				  adev->sdma.num_instances);
-			return -EINVAL;
-		}
-		break;
-	case AMDGPU_HW_IP_UVD:
-		*out_ring = &adev->uvd.ring;
-		break;
-	case AMDGPU_HW_IP_VCE:
-		if (ring < adev->vce.num_rings){
-			*out_ring = &adev->vce.ring[ring];
-		} else {
-			DRM_ERROR("only %d VCE rings are supported\n", adev->vce.num_rings);
-			return -EINVAL;
-		}
-		break;
-	case AMDGPU_HW_IP_UVD_ENC:
-		if (ring < adev->uvd.num_enc_rings){
-			*out_ring = &adev->uvd.ring_enc[ring];
-		} else {
-			DRM_ERROR("only %d UVD ENC rings are supported\n",
-				adev->uvd.num_enc_rings);
-			return -EINVAL;
-		}
-		break;
-	}
-
-	if (!(*out_ring && (*out_ring)->adev)) {
-		DRM_ERROR("Ring %d is not initialized on IP %d\n",
-			  ring, ip_type);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 				      struct drm_amdgpu_cs_chunk_fence *data,
 				      uint32_t *offset)
 {
 	struct drm_gem_object *gobj;
 	unsigned long size;
 
 	gobj = drm_gem_object_lookup(p->filp, data->handle);
 	if (gobj == NULL)
 		return -EINVAL;
 
 	p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
 	p->uf_entry.priority = 0;
 	p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
 	p->uf_entry.tv.shared = true;
 	p->uf_entry.user_pages = NULL;
 
 	size = amdgpu_bo_size(p->uf_entry.robj);
 	if (size != PAGE_SIZE || (data->offset + 8) > size)
 		return -EINVAL;
@@ -899,43 +827,42 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		chunk = &parser->chunks[i];
 		ib = &parser->job->ibs[j];
 		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
 
 		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
 			continue;
 
 		if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) {
 			if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
 				if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
 					ce_preempt++;
 				else
 					de_preempt++;
 			}
 
 			/* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
 			if (ce_preempt > 1 || de_preempt > 1)
 				return -EINVAL;
 		}
 
-		r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type,
-				       chunk_ib->ip_instance, chunk_ib->ring,
-				       &ring);
+		r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type,
+					 chunk_ib->ip_instance, chunk_ib->ring, &ring);
 		if (r)
 			return r;
 
 		if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
 			parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
 			if (!parser->ctx->preamble_presented) {
 				parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
 				parser->ctx->preamble_presented = true;
 			}
 		}
 
 		if (parser->job->ring && parser->job->ring != ring)
 			return -EINVAL;
 
 		parser->job->ring = ring;
 
 		if (ring->funcs->parse_cs) {
 			struct amdgpu_bo_va_mapping *m;
 			struct amdgpu_bo *aobj = NULL;
 			uint64_t offset;
@@ -1003,50 +930,53 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 
 	for (i = 0; i < p->nchunks; ++i) {
 		struct drm_amdgpu_cs_chunk_dep *deps;
 		struct amdgpu_cs_chunk *chunk;
 		unsigned num_deps;
 
 		chunk = &p->chunks[i];
 
 		if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
 			continue;
 
 		deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
 		num_deps = chunk->length_dw * 4 /
 			sizeof(struct drm_amdgpu_cs_chunk_dep);
 
 		for (j = 0; j < num_deps; ++j) {
 			struct amdgpu_ring *ring;
 			struct amdgpu_ctx *ctx;
 			struct dma_fence *fence;
 
-			r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
-					       deps[j].ip_instance,
-					       deps[j].ring, &ring);
-			if (r)
-				return r;
-
 			ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
 			if (ctx == NULL)
 				return -EINVAL;
 
+			r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
+						 deps[j].ip_type,
+						 deps[j].ip_instance,
+						 deps[j].ring, &ring);
+			if (r) {
+				amdgpu_ctx_put(ctx);
+				return r;
+			}
+
 			fence = amdgpu_ctx_get_fence(ctx, ring,
 						     deps[j].handle);
 			if (IS_ERR(fence)) {
 				r = PTR_ERR(fence);
 				amdgpu_ctx_put(ctx);
 				return r;
 
 			} else if (fence) {
 				r = amdgpu_sync_fence(adev, &p->job->sync,
 						      fence);
 				dma_fence_put(fence);
 				amdgpu_ctx_put(ctx);
 				if (r)
 					return r;
 			}
 		}
 	}
 
 	return 0;
 }
@@ -1138,93 +1068,98 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 /**
  * amdgpu_cs_wait_ioctl - wait for a command submission to finish
  *
  * @dev: drm device
  * @data: data from userspace
  * @filp: file private
  *
  * Wait for the command submission identified by handle to finish.
  */
 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *filp)
 {
 	union drm_amdgpu_wait_cs *wait = data;
 	struct amdgpu_device *adev = dev->dev_private;
 	unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
 	struct amdgpu_ring *ring = NULL;
 	struct amdgpu_ctx *ctx;
 	struct dma_fence *fence;
 	long r;
 
-	r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
-			       wait->in.ring, &ring);
-	if (r)
-		return r;
-
 	ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
 	if (ctx == NULL)
 		return -EINVAL;
 
+	r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr,
+				 wait->in.ip_type, wait->in.ip_instance,
+				 wait->in.ring, &ring);
+	if (r) {
+		amdgpu_ctx_put(ctx);
+		return r;
+	}
+
 	fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
 	if (IS_ERR(fence))
 		r = PTR_ERR(fence);
 	else if (fence) {
 		r = dma_fence_wait_timeout(fence, true, timeout);
 		dma_fence_put(fence);
 	} else
 		r = 1;
 
 	amdgpu_ctx_put(ctx);
 	if (r < 0)
 		return r;
 
 	memset(wait, 0, sizeof(*wait));
 	wait->out.status = (r == 0);
 
 	return 0;
 }
 
 /**
  * amdgpu_cs_get_fence - helper to get fence from drm_amdgpu_fence
  *
  * @adev: amdgpu device
  * @filp: file private
  * @user: drm_amdgpu_fence copied from user space
  */
 static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev,
 					     struct drm_file *filp,
 					     struct drm_amdgpu_fence *user)
 {
 	struct amdgpu_ring *ring;
 	struct amdgpu_ctx *ctx;
 	struct dma_fence *fence;
 	int r;
 
-	r = amdgpu_cs_get_ring(adev, user->ip_type, user->ip_instance,
-			       user->ring, &ring);
-	if (r)
-		return ERR_PTR(r);
-
 	ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id);
 	if (ctx == NULL)
 		return ERR_PTR(-EINVAL);
 
+	r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type,
+				 user->ip_instance, user->ring, &ring);
+	if (r) {
+		amdgpu_ctx_put(ctx);
+		return ERR_PTR(r);
+	}
+
 	fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no);
 	amdgpu_ctx_put(ctx);
 
 	return fence;
 }
 
 /**
  * amdgpu_cs_wait_all_fence - wait on all fences to signal
  *
  * @adev: amdgpu device
  * @filp: file private
  * @wait: wait parameters
  * @fences: array of drm_amdgpu_fence
  */
 static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
 				     struct drm_file *filp,
 				     union drm_amdgpu_wait_fences *wait,
 				     struct drm_amdgpu_fence *fences)
 {
 	uint32_t fence_count = wait->in.fence_count;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 90d1ac8..1969f27 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -41,68 +41,74 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		ctx->rings[i].sequence = 1;
 		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
 	}
 
 	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
 
 	/* create context entity for each ring */
 	for (i = 0; i < adev->num_rings; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
 		struct amd_sched_rq *rq;
 
 		rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
 		r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
 					  rq, amdgpu_sched_jobs);
 		if (r)
 			goto failed;
 	}
 
+	r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
+	if (r)
+		goto failed;
+
 	return 0;
 
 failed:
 	for (j = 0; j < i; j++)
 		amd_sched_entity_fini(&adev->rings[j]->sched,
 				      &ctx->rings[j].entity);
 	kfree(ctx->fences);
 	ctx->fences = NULL;
 	return r;
 }
 
 static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 {
 	struct amdgpu_device *adev = ctx->adev;
 	unsigned i, j;
 
 	if (!adev)
 		return;
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		for (j = 0; j < amdgpu_sched_jobs; ++j)
 			dma_fence_put(ctx->rings[i].fences[j]);
 	kfree(ctx->fences);
 	ctx->fences = NULL;
 
 	for (i = 0; i < adev->num_rings; i++)
 		amd_sched_entity_fini(&adev->rings[i]->sched,
 				      &ctx->rings[i].entity);
+
+	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
 }
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 			    struct amdgpu_fpriv *fpriv,
 			    uint32_t *id)
 {
 	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
 	struct amdgpu_ctx *ctx;
 	int r;
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
 	mutex_lock(&mgr->lock);
 	r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
 	if (r < 0) {
 		mutex_unlock(&mgr->lock);
 		kfree(ctx);
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
new file mode 100644
index 0000000..3e9ac80
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright 2017 Valve Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Andres Rodriguez
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ring.h"
+
+static int amdgpu_queue_mapper_init(struct amdgpu_queue_mapper *mapper,
+				    int hw_ip)
+{
+	if (!mapper)
+		return -EINVAL;
+
+	if (hw_ip > AMDGPU_MAX_IP_NUM)
+		return -EINVAL;
+
+	mapper->hw_ip = hw_ip;
+	mutex_init(&mapper->lock);
+
+	memset(mapper->queue_map, 0, sizeof(mapper->queue_map));
+
+	return 0;
+}
+
+static struct amdgpu_ring *amdgpu_get_cached_map(struct amdgpu_queue_mapper *mapper,
+					  int ring)
+{
+	return mapper->queue_map[ring];
+}
+
+static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper,
+			     int ring, struct amdgpu_ring *pring)
+{
+	if (WARN_ON(mapper->queue_map[ring])) {
+		DRM_ERROR("Un-expected ring re-map\n");
+		return -EINVAL;
+	}
+
+	mapper->queue_map[ring] = pring;
+
+	return 0;
+}
+
+static int amdgpu_identity_map(struct amdgpu_device *adev,
+			       struct amdgpu_queue_mapper *mapper,
+			       int ring,
+			       struct amdgpu_ring **out_ring)
+{
+	switch (mapper->hw_ip) {
+	case AMDGPU_HW_IP_GFX:
+		*out_ring = &adev->gfx.gfx_ring[ring];
+		break;
+	case AMDGPU_HW_IP_COMPUTE:
+		*out_ring = &adev->gfx.compute_ring[ring];
+		break;
+	case AMDGPU_HW_IP_DMA:
+		*out_ring = &adev->sdma.instance[ring].ring;
+		break;
+	case AMDGPU_HW_IP_UVD:
+		*out_ring = &adev->uvd.ring;
+		break;
+	case AMDGPU_HW_IP_VCE:
+		*out_ring = &adev->vce.ring[ring];
+		break;
+	default:
+		*out_ring = NULL;
+		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
+		return -EINVAL;
+	}
+
+	return amdgpu_update_cached_map(mapper, ring, *out_ring);
+}
+
+/**
+ * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
+ *
+ * @adev: amdgpu_device pointer
+ * @mgr: amdgpu_queue_mgr structure holding queue information
+ *
+ * Initialize the the selected @mgr (all asics).
+ *
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
+			  struct amdgpu_queue_mgr *mgr)
+{
+	int i, r;
+
+	if (!adev || !mgr)
+		return -EINVAL;
+
+	memset(mgr, 0, sizeof(*mgr));
+
+	for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) {
+		r = amdgpu_queue_mapper_init(&mgr->mapper[i], i);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_queue_mgr_fini - de-initialize an amdgpu_queue_mgr struct
+ *
+ * @adev: amdgpu_device pointer
+ * @mgr: amdgpu_queue_mgr structure holding queue information
+ *
+ * De-initialize the the selected @mgr (all asics).
+ *
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_queue_mgr_fini(struct amdgpu_device *adev,
+			  struct amdgpu_queue_mgr *mgr)
+{
+	return 0;
+}
+
+/**
+ * amdgpu_queue_mgr_map - Map a userspace ring id to an amdgpu_ring
+ *
+ * @adev: amdgpu_device pointer
+ * @mgr: amdgpu_queue_mgr structure holding queue information
+ * @hw_ip: HW IP enum
+ * @instance: HW instance
+ * @ring: user ring id
+ * @our_ring: pointer to mapped amdgpu_ring
+ *
+ * Map a userspace ring id to an appropriate kernel ring. Different
+ * policies are configurable at a HW IP level.
+ *
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
+			 struct amdgpu_queue_mgr *mgr,
+			 int hw_ip, int instance, int ring,
+			 struct amdgpu_ring **out_ring)
+{
+	int r, ip_num_rings;
+	struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip];
+
+	if (!adev || !mgr || !out_ring)
+		return -EINVAL;
+
+	if (hw_ip >= AMDGPU_MAX_IP_NUM)
+		return -EINVAL;
+
+	if (ring >= AMDGPU_MAX_RINGS)
+		return -EINVAL;
+
+	/* Right now all IPs have only one instance - multiple rings. */
+	if (instance != 0) {
+		DRM_ERROR("invalid ip instance: %d\n", instance);
+		return -EINVAL;
+	}
+
+	switch (hw_ip) {
+	case AMDGPU_HW_IP_GFX:
+		ip_num_rings = adev->gfx.num_gfx_rings;
+		break;
+	case AMDGPU_HW_IP_COMPUTE:
+		ip_num_rings = adev->gfx.num_compute_rings;
+		break;
+	case AMDGPU_HW_IP_DMA:
+		ip_num_rings = adev->sdma.num_instances;
+		break;
+	case AMDGPU_HW_IP_UVD:
+		ip_num_rings = 1;
+		break;
+	case AMDGPU_HW_IP_VCE:
+		ip_num_rings = adev->vce.num_rings;
+		break;
+	default:
+		DRM_ERROR("unknown ip type: %d\n", hw_ip);
+		return -EINVAL;
+	}
+
+	if (ring >= ip_num_rings) {
+		DRM_ERROR("Ring index:%d exceeds maximum:%d for ip:%d\n",
+				ring, ip_num_rings, hw_ip);
+		return -EINVAL;
+	}
+
+	mutex_lock(&mapper->lock);
+
+	*out_ring = amdgpu_get_cached_map(mapper, ring);
+	if (*out_ring) {
+		/* cache hit */
+		r = 0;
+		goto out_unlock;
+	}
+
+	switch (mapper->hw_ip) {
+	case AMDGPU_HW_IP_GFX:
+	case AMDGPU_HW_IP_COMPUTE:
+	case AMDGPU_HW_IP_DMA:
+	case AMDGPU_HW_IP_UVD:
+	case AMDGPU_HW_IP_VCE:
+		r = amdgpu_identity_map(adev, mapper, ring, out_ring);
+		break;
+	default:
+		*out_ring = NULL;
+		r = -EINVAL;
+		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
+	}
+
+out_unlock:
+	mutex_unlock(&mapper->lock);
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 6a85db0..12fc815 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -35,40 +35,85 @@
 #include "atom.h"
 
 /*
  * Rings
  * Most engines on the GPU are fed via ring buffers.  Ring
  * buffers are areas of GPU accessible memory that the host
  * writes commands into and the GPU reads commands out of.
  * There is a rptr (read pointer) that determines where the
  * GPU is currently reading, and a wptr (write pointer)
  * which determines where the host has written.  When the
  * pointers are equal, the ring is idle.  When the host
  * writes commands to the ring buffer, it increments the
  * wptr.  The GPU then starts fetching commands and executes
  * them until the pointers are equal again.
  */
 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
 				    struct amdgpu_ring *ring);
 static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring);
 
 /**
+ * amdgpu_ring_is_valid_index - check if a ring idex is valid for a HW IP
+ *
+ * @adev: amdgpu_device pointer
+ * @ip_type: The HW IP to check against
+ * @ring: the ring index
+ *
+ * Check if @ring is a valid index for @ip_type (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_ring_is_valid_index(struct amdgpu_device *adev,
+			       int ip_type, int ring)
+{
+	int ip_num_rings;
+
+	switch (ip_type) {
+	case AMDGPU_HW_IP_GFX:
+		ip_num_rings = adev->gfx.num_gfx_rings;
+		break;
+	case AMDGPU_HW_IP_COMPUTE:
+		ip_num_rings = adev->gfx.num_compute_rings;
+		break;
+	case AMDGPU_HW_IP_DMA:
+		ip_num_rings = adev->sdma.num_instances;
+		break;
+	case AMDGPU_HW_IP_UVD:
+		ip_num_rings = 1;
+		break;
+	case AMDGPU_HW_IP_VCE:
+		ip_num_rings = adev->vce.num_rings;
+		break;
+	default:
+		DRM_ERROR("unknown ip type: %d\n", ip_type);
+		return -EINVAL;
+	}
+
+	if (ring >= ip_num_rings) {
+		DRM_ERROR("Ring index:%d exceeds maximum:%d for ip:%d\n",
+				ring, ip_num_rings, ip_type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
  * amdgpu_ring_alloc - allocate space on the ring buffer
  *
  * @adev: amdgpu_device pointer
  * @ring: amdgpu_ring structure holding ring information
  * @ndw: number of dwords to allocate in the ring buffer
  *
  * Allocate @ndw dwords in the ring buffer (all asics).
  * Returns 0 on success, error on failure.
  */
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
 {
 	/* Align requested size with padding so unlock_commit can
 	 * pad safely */
 	ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
 
 	/* Make sure we aren't trying to allocate more space
 	 * than the maximum for one submission
 	 */
 	if (WARN_ON_ONCE(ndw > ring->max_dw))
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 944443c..4de0d83 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -168,38 +168,40 @@ struct amdgpu_ring {
 	u32			queue;
 	struct amdgpu_bo	*mqd_obj;
 	uint64_t                mqd_gpu_addr;
 	void                    *mqd_ptr;
 	uint64_t                eop_gpu_addr;
 	u32			doorbell_index;
 	bool			use_doorbell;
 	unsigned		wptr_offs;
 	unsigned		fence_offs;
 	uint64_t		current_ctx;
 	char			name[16];
 	unsigned		cond_exe_offs;
 	u64			cond_exe_gpu_addr;
 	volatile u32		*cond_exe_cpu_addr;
 	unsigned		vm_inv_eng;
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry *ent;
 #endif
 };
 
+int amdgpu_ring_is_valid_index(struct amdgpu_device *adev,
+			       int hw_ip, int ring);
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
 		     unsigned irq_type);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 {
 	int i = 0;
 	while (i <= ring->buf_mask)
 		ring->ring[i++] = ring->funcs->nop;
 
 }
 
 #endif
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/6] drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4
       [not found] ` <20170421234836.18620-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-04-21 23:48   ` [PATCH 3/6] drm/amdgpu: untie user ring ids from kernel ring ids v4 Andres Rodriguez
@ 2017-04-21 23:48   ` Andres Rodriguez
  2017-04-21 23:48   ` [PATCH 5/6] drm/amdgpu: guarantee bijective mapping of ring ids for LRU v3 Andres Rodriguez
                     ` (2 subsequent siblings)
  6 siblings, 0 replies; 12+ messages in thread
From: Andres Rodriguez @ 2017-04-21 23:48 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Use an LRU policy to map usermode rings to HW compute queues.

Most compute clients use one queue, and usually the first queue
available. This results in poor pipe/queue work distribution when
multiple compute apps are running. In most cases pipe 0 queue 0 is
the only queue that gets used.

In order to better distribute work across multiple HW queues, we adopt
a policy to map the usermode ring ids to the LRU HW queue.

This fixes a large majority of multi-app compute workloads sharing the
same HW queue, even though 7 other queues are available.

v2: use ring->funcs->type instead of ring->hw_ip
v3: remove amdgpu_queue_mapper_funcs
v4: change ring_lru_list_lock to spinlock, grab only once in lru_get()

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    |  3 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 38 +++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c      | 63 +++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h      |  4 ++
 5 files changed, 110 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 7eb6c9c..f7eac93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1616,40 +1616,43 @@ struct amdgpu_device {
 	int				num_ip_blocks;
 	struct mutex	mn_lock;
 	DECLARE_HASHTABLE(mn_hash, 7);
 
 	/* tracking pinned memory */
 	u64 vram_pin_size;
 	u64 invisible_pin_size;
 	u64 gart_pin_size;
 
 	/* amdkfd interface */
 	struct kfd_dev          *kfd;
 
 	struct amdgpu_virt	virt;
 
 	/* link all shadow bo */
 	struct list_head                shadow_list;
 	struct mutex                    shadow_list_lock;
 	/* link all gtt */
 	spinlock_t			gtt_list_lock;
 	struct list_head                gtt_list;
+	/* keep an lru list of rings by HW IP */
+	struct list_head		ring_lru_list;
+	spinlock_t			ring_lru_list_lock;
 
 	/* record hw reset is performed */
 	bool has_hw_reset;
 
 };
 
 static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
 {
 	return container_of(bdev, struct amdgpu_device, mman.bdev);
 }
 
 bool amdgpu_device_is_px(struct drm_device *dev);
 int amdgpu_device_init(struct amdgpu_device *adev,
 		       struct drm_device *ddev,
 		       struct pci_dev *pdev,
 		       uint32_t flags);
 void amdgpu_device_fini(struct amdgpu_device *adev);
 int amdgpu_gpu_wait_for_idle(struct amdgpu_device *adev);
 
 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 8aad6f4..f1f03b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1864,40 +1864,43 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 	amdgpu_check_arguments(adev);
 
 	/* Registers mapping */
 	/* TODO: block userspace mapping of io register */
 	spin_lock_init(&adev->mmio_idx_lock);
 	spin_lock_init(&adev->smc_idx_lock);
 	spin_lock_init(&adev->pcie_idx_lock);
 	spin_lock_init(&adev->uvd_ctx_idx_lock);
 	spin_lock_init(&adev->didt_idx_lock);
 	spin_lock_init(&adev->gc_cac_idx_lock);
 	spin_lock_init(&adev->audio_endpt_idx_lock);
 	spin_lock_init(&adev->mm_stats.lock);
 
 	INIT_LIST_HEAD(&adev->shadow_list);
 	mutex_init(&adev->shadow_list_lock);
 
 	INIT_LIST_HEAD(&adev->gtt_list);
 	spin_lock_init(&adev->gtt_list_lock);
 
+	INIT_LIST_HEAD(&adev->ring_lru_list);
+	spin_lock_init(&adev->ring_lru_list_lock);
+
 	if (adev->asic_type >= CHIP_BONAIRE) {
 		adev->rmmio_base = pci_resource_start(adev->pdev, 5);
 		adev->rmmio_size = pci_resource_len(adev->pdev, 5);
 	} else {
 		adev->rmmio_base = pci_resource_start(adev->pdev, 2);
 		adev->rmmio_size = pci_resource_len(adev->pdev, 2);
 	}
 
 	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
 	if (adev->rmmio == NULL) {
 		return -ENOMEM;
 	}
 	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
 	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
 
 	if (adev->asic_type >= CHIP_BONAIRE)
 		/* doorbell bar mapping */
 		amdgpu_doorbell_init(adev);
 
 	/* io port mapping */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
index 3e9ac80..054d750 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -74,40 +74,74 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
 		*out_ring = &adev->gfx.compute_ring[ring];
 		break;
 	case AMDGPU_HW_IP_DMA:
 		*out_ring = &adev->sdma.instance[ring].ring;
 		break;
 	case AMDGPU_HW_IP_UVD:
 		*out_ring = &adev->uvd.ring;
 		break;
 	case AMDGPU_HW_IP_VCE:
 		*out_ring = &adev->vce.ring[ring];
 		break;
 	default:
 		*out_ring = NULL;
 		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
 		return -EINVAL;
 	}
 
 	return amdgpu_update_cached_map(mapper, ring, *out_ring);
 }
 
+static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
+{
+	switch (hw_ip) {
+	case AMDGPU_HW_IP_GFX:
+		return AMDGPU_RING_TYPE_GFX;
+	case AMDGPU_HW_IP_COMPUTE:
+		return AMDGPU_RING_TYPE_COMPUTE;
+	case AMDGPU_HW_IP_DMA:
+		return AMDGPU_RING_TYPE_SDMA;
+	case AMDGPU_HW_IP_UVD:
+		return AMDGPU_RING_TYPE_UVD;
+	case AMDGPU_HW_IP_VCE:
+		return AMDGPU_RING_TYPE_VCE;
+	default:
+		DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
+		return -1;
+	}
+}
+
+static int amdgpu_lru_map(struct amdgpu_device *adev,
+			  struct amdgpu_queue_mapper *mapper,
+			  int user_ring,
+			  struct amdgpu_ring **out_ring)
+{
+	int r;
+	int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
+
+	r = amdgpu_ring_lru_get(adev, ring_type, out_ring);
+	if (r)
+		return r;
+
+	return amdgpu_update_cached_map(mapper, user_ring, *out_ring);
+}
+
 /**
  * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
  *
  * @adev: amdgpu_device pointer
  * @mgr: amdgpu_queue_mgr structure holding queue information
  *
  * Initialize the the selected @mgr (all asics).
  *
  * Returns 0 on success, error on failure.
  */
 int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
 			  struct amdgpu_queue_mgr *mgr)
 {
 	int i, r;
 
 	if (!adev || !mgr)
 		return -EINVAL;
 
 	memset(mgr, 0, sizeof(*mgr));
 
@@ -195,36 +229,38 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
 		return -EINVAL;
 	}
 
 	if (ring >= ip_num_rings) {
 		DRM_ERROR("Ring index:%d exceeds maximum:%d for ip:%d\n",
 				ring, ip_num_rings, hw_ip);
 		return -EINVAL;
 	}
 
 	mutex_lock(&mapper->lock);
 
 	*out_ring = amdgpu_get_cached_map(mapper, ring);
 	if (*out_ring) {
 		/* cache hit */
 		r = 0;
 		goto out_unlock;
 	}
 
 	switch (mapper->hw_ip) {
 	case AMDGPU_HW_IP_GFX:
-	case AMDGPU_HW_IP_COMPUTE:
 	case AMDGPU_HW_IP_DMA:
 	case AMDGPU_HW_IP_UVD:
 	case AMDGPU_HW_IP_VCE:
 		r = amdgpu_identity_map(adev, mapper, ring, out_ring);
 		break;
+	case AMDGPU_HW_IP_COMPUTE:
+		r = amdgpu_lru_map(adev, mapper, ring, out_ring);
+		break;
 	default:
 		*out_ring = NULL;
 		r = -EINVAL;
 		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
 	}
 
 out_unlock:
 	mutex_unlock(&mapper->lock);
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 12fc815..2b452b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -163,40 +163,42 @@ void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
  * @ring: amdgpu_ring structure holding ring information
  *
  * Update the wptr (write pointer) to tell the GPU to
  * execute new commands on the ring buffer (all asics).
  */
 void amdgpu_ring_commit(struct amdgpu_ring *ring)
 {
 	uint32_t count;
 
 	/* We pad to match fetch size */
 	count = ring->funcs->align_mask + 1 -
 		(ring->wptr & ring->funcs->align_mask);
 	count %= ring->funcs->align_mask + 1;
 	ring->funcs->insert_nop(ring, count);
 
 	mb();
 	amdgpu_ring_set_wptr(ring);
 
 	if (ring->funcs->end_use)
 		ring->funcs->end_use(ring);
+
+	amdgpu_ring_lru_touch(ring->adev, ring);
 }
 
 /**
  * amdgpu_ring_undo - reset the wptr
  *
  * @ring: amdgpu_ring structure holding ring information
  *
  * Reset the driver's copy of the wptr (all asics).
  */
 void amdgpu_ring_undo(struct amdgpu_ring *ring)
 {
 	ring->wptr = ring->wptr_old;
 
 	if (ring->funcs->end_use)
 		ring->funcs->end_use(ring);
 }
 
 /**
  * amdgpu_ring_init - init driver ring struct.
  *
@@ -281,40 +283,42 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 					     amdgpu_sched_hw_submission);
 
 	ring->buf_mask = (ring->ring_size / 4) - 1;
 	ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
 		0xffffffffffffffff : ring->buf_mask;
 	/* Allocate ring buffer */
 	if (ring->ring_obj == NULL) {
 		r = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
 					    AMDGPU_GEM_DOMAIN_GTT,
 					    &ring->ring_obj,
 					    &ring->gpu_addr,
 					    (void **)&ring->ring);
 		if (r) {
 			dev_err(adev->dev, "(%d) ring create failed\n", r);
 			return r;
 		}
 		amdgpu_ring_clear_ring(ring);
 	}
 
 	ring->max_dw = max_dw;
+	INIT_LIST_HEAD(&ring->lru_list);
+	amdgpu_ring_lru_touch(adev, ring);
 
 	if (amdgpu_debugfs_ring_init(adev, ring)) {
 		DRM_ERROR("Failed to register debugfs file for rings !\n");
 	}
 	return 0;
 }
 
 /**
  * amdgpu_ring_fini - tear down the driver ring struct.
  *
  * @adev: amdgpu_device pointer
  * @ring: amdgpu_ring structure holding ring information
  *
  * Tear down the driver information for the selected ring (all asics).
  */
 void amdgpu_ring_fini(struct amdgpu_ring *ring)
 {
 	ring->ready = false;
 
 	if (ring->funcs->support_64bit_ptrs) {
@@ -322,40 +326,99 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 		amdgpu_wb_free_64bit(ring->adev, ring->fence_offs);
 		amdgpu_wb_free_64bit(ring->adev, ring->rptr_offs);
 		amdgpu_wb_free_64bit(ring->adev, ring->wptr_offs);
 	} else {
 		amdgpu_wb_free(ring->adev, ring->cond_exe_offs);
 		amdgpu_wb_free(ring->adev, ring->fence_offs);
 		amdgpu_wb_free(ring->adev, ring->rptr_offs);
 		amdgpu_wb_free(ring->adev, ring->wptr_offs);
 	}
 
 
 	amdgpu_bo_free_kernel(&ring->ring_obj,
 			      &ring->gpu_addr,
 			      (void **)&ring->ring);
 
 	amdgpu_debugfs_ring_fini(ring);
 
 	ring->adev->rings[ring->idx] = NULL;
 }
 
+static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev,
+					 struct amdgpu_ring *ring)
+{
+	/* list_move_tail handles the case where ring isn't part of the list */
+	list_move_tail(&ring->lru_list, &adev->ring_lru_list);
+}
+
+/**
+ * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block
+ *
+ * @adev: amdgpu_device pointer
+ * @type: amdgpu_ring_type enum
+ * @ring: output ring
+ *
+ * Retrieve the amdgpu_ring structure for the least recently used ring of
+ * a specific IP block (all asics).
+ * Returns 0 on success, error on failure.
+ */
+int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
+			struct amdgpu_ring **ring)
+{
+	struct amdgpu_ring *entry;
+
+	/* List is sorted in LRU order, find first entry corresponding
+	 * to the desired HW IP */
+	*ring = NULL;
+	spin_lock(&adev->ring_lru_list_lock);
+	list_for_each_entry(entry, &adev->ring_lru_list, lru_list) {
+		if (entry->funcs->type == type) {
+			*ring = entry;
+			amdgpu_ring_lru_touch_locked(adev, *ring);
+			break;
+		}
+	}
+	spin_unlock(&adev->ring_lru_list_lock);
+
+	if (!*ring) {
+		DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * amdgpu_ring_lru_touch - mark a ring as recently being used
+ *
+ * @adev: amdgpu_device pointer
+ * @ring: ring to touch
+ *
+ * Move @ring to the tail of the lru list
+ */
+void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+	spin_lock(&adev->ring_lru_list_lock);
+	amdgpu_ring_lru_touch_locked(adev, ring);
+	spin_unlock(&adev->ring_lru_list_lock);
+}
+
 /*
  * Debugfs info
  */
 #if defined(CONFIG_DEBUG_FS)
 
 /* Layout of file is 12 bytes consisting of
  * - rptr
  * - wptr
  * - driver's copy of wptr
  *
  * followed by n-words of ring data
  */
 static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
 					size_t size, loff_t *pos)
 {
 	struct amdgpu_ring *ring = file_inode(f)->i_private;
 	int r, i;
 	uint32_t value, result, early[3];
 
 	if (*pos & 3 || size & 3)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 4de0d83..3967f7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -132,40 +132,41 @@ struct amdgpu_ring_funcs {
 	void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
 	void (*insert_end)(struct amdgpu_ring *ring);
 	/* pad the indirect buffer to the necessary number of dw */
 	void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 	unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
 	void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
 	/* note usage for clock and power gating */
 	void (*begin_use)(struct amdgpu_ring *ring);
 	void (*end_use)(struct amdgpu_ring *ring);
 	void (*emit_switch_buffer) (struct amdgpu_ring *ring);
 	void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
 	void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
 	void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
 };
 
 struct amdgpu_ring {
 	struct amdgpu_device		*adev;
 	const struct amdgpu_ring_funcs	*funcs;
 	struct amdgpu_fence_driver	fence_drv;
 	struct amd_gpu_scheduler	sched;
+	struct list_head		lru_list;
 
 	struct amdgpu_bo	*ring_obj;
 	volatile uint32_t	*ring;
 	unsigned		rptr_offs;
 	u64			wptr;
 	u64			wptr_old;
 	unsigned		ring_size;
 	unsigned		max_dw;
 	int			count_dw;
 	uint64_t		gpu_addr;
 	uint64_t		ptr_mask;
 	uint32_t		buf_mask;
 	bool			ready;
 	u32			idx;
 	u32			me;
 	u32			pipe;
 	u32			queue;
 	struct amdgpu_bo	*mqd_obj;
 	uint64_t                mqd_gpu_addr;
 	void                    *mqd_ptr;
@@ -179,29 +180,32 @@ struct amdgpu_ring {
 	unsigned		cond_exe_offs;
 	u64			cond_exe_gpu_addr;
 	volatile u32		*cond_exe_cpu_addr;
 	unsigned		vm_inv_eng;
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry *ent;
 #endif
 };
 
 int amdgpu_ring_is_valid_index(struct amdgpu_device *adev,
 			       int hw_ip, int ring);
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
 		     unsigned irq_type);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
+int amdgpu_ring_lru_get(struct amdgpu_device *adev, int hw_ip,
+			struct amdgpu_ring **ring);
+void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 {
 	int i = 0;
 	while (i <= ring->buf_mask)
 		ring->ring[i++] = ring->funcs->nop;
 
 }
 
 #endif
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 5/6] drm/amdgpu: guarantee bijective mapping of ring ids for LRU v3
       [not found] ` <20170421234836.18620-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (3 preceding siblings ...)
  2017-04-21 23:48   ` [PATCH 4/6] drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4 Andres Rodriguez
@ 2017-04-21 23:48   ` Andres Rodriguez
  2017-04-21 23:48   ` [PATCH 6/6] drm/amdgpu: use LRU mapping policy for SDMA engines Andres Rodriguez
  2017-04-24  7:40   ` [PATCH] LRU map compute/SDMA user ring ids to kernel ring ids v2 Christian König
  6 siblings, 0 replies; 12+ messages in thread
From: Andres Rodriguez @ 2017-04-21 23:48 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Depending on usage patterns, the current LRU policy may create a
non-injective mapping between userspace ring ids and kernel rings.

This behaviour is undesired as apps that attempt to fill all HW blocks
would be unable to reach some of them.

This change forces the LRU policy to create bijective mappings only.

v2: compress ring_blacklist
v3: simplify amdgpu_ring_is_blacklisted() logic

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 16 +++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c      | 33 +++++++++++++++++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h      |  4 ++--
 3 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
index 054d750..5a7c691 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -98,44 +98,56 @@ static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip)
 		return AMDGPU_RING_TYPE_GFX;
 	case AMDGPU_HW_IP_COMPUTE:
 		return AMDGPU_RING_TYPE_COMPUTE;
 	case AMDGPU_HW_IP_DMA:
 		return AMDGPU_RING_TYPE_SDMA;
 	case AMDGPU_HW_IP_UVD:
 		return AMDGPU_RING_TYPE_UVD;
 	case AMDGPU_HW_IP_VCE:
 		return AMDGPU_RING_TYPE_VCE;
 	default:
 		DRM_ERROR("Invalid HW IP specified %d\n", hw_ip);
 		return -1;
 	}
 }
 
 static int amdgpu_lru_map(struct amdgpu_device *adev,
 			  struct amdgpu_queue_mapper *mapper,
 			  int user_ring,
 			  struct amdgpu_ring **out_ring)
 {
-	int r;
+	int r, i, j;
 	int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip);
+	int ring_blacklist[AMDGPU_MAX_RINGS];
+	struct amdgpu_ring *ring;
 
-	r = amdgpu_ring_lru_get(adev, ring_type, out_ring);
+	/* 0 is a valid ring index, so initialize to -1 */
+	memset(ring_blacklist, 0xff, sizeof(ring_blacklist));
+
+	for (i = 0, j = 0; i < AMDGPU_MAX_RINGS; i++) {
+		ring = mapper->queue_map[i];
+		if (ring)
+			ring_blacklist[j++] = ring->idx;
+	}
+
+	r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist,
+				j, out_ring);
 	if (r)
 		return r;
 
 	return amdgpu_update_cached_map(mapper, user_ring, *out_ring);
 }
 
 /**
  * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct
  *
  * @adev: amdgpu_device pointer
  * @mgr: amdgpu_queue_mgr structure holding queue information
  *
  * Initialize the the selected @mgr (all asics).
  *
  * Returns 0 on success, error on failure.
  */
 int amdgpu_queue_mgr_init(struct amdgpu_device *adev,
 			  struct amdgpu_queue_mgr *mgr)
 {
 	int i, r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 2b452b0..7486277 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -333,66 +333,85 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 		amdgpu_wb_free(ring->adev, ring->wptr_offs);
 	}
 
 
 	amdgpu_bo_free_kernel(&ring->ring_obj,
 			      &ring->gpu_addr,
 			      (void **)&ring->ring);
 
 	amdgpu_debugfs_ring_fini(ring);
 
 	ring->adev->rings[ring->idx] = NULL;
 }
 
 static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev,
 					 struct amdgpu_ring *ring)
 {
 	/* list_move_tail handles the case where ring isn't part of the list */
 	list_move_tail(&ring->lru_list, &adev->ring_lru_list);
 }
 
+static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring,
+				       int *blacklist, int num_blacklist)
+{
+	int i;
+
+	for (i = 0; i < num_blacklist; i++) {
+		if (ring->idx == blacklist[i])
+			return true;
+	}
+
+	return false;
+}
+
 /**
  * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block
  *
  * @adev: amdgpu_device pointer
  * @type: amdgpu_ring_type enum
+ * @blacklist: blacklisted ring ids array
+ * @num_blacklist: number of entries in @blacklist
  * @ring: output ring
  *
  * Retrieve the amdgpu_ring structure for the least recently used ring of
  * a specific IP block (all asics).
  * Returns 0 on success, error on failure.
  */
-int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type,
-			struct amdgpu_ring **ring)
+int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
+			int num_blacklist, struct amdgpu_ring **ring)
 {
 	struct amdgpu_ring *entry;
 
 	/* List is sorted in LRU order, find first entry corresponding
 	 * to the desired HW IP */
 	*ring = NULL;
 	spin_lock(&adev->ring_lru_list_lock);
 	list_for_each_entry(entry, &adev->ring_lru_list, lru_list) {
-		if (entry->funcs->type == type) {
-			*ring = entry;
-			amdgpu_ring_lru_touch_locked(adev, *ring);
-			break;
-		}
+		if (entry->funcs->type != type)
+			continue;
+
+		if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist))
+			continue;
+
+		*ring = entry;
+		amdgpu_ring_lru_touch_locked(adev, *ring);
+		break;
 	}
 	spin_unlock(&adev->ring_lru_list_lock);
 
 	if (!*ring) {
 		DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type);
 		return -EINVAL;
 	}
 
 	return 0;
 }
 
 /**
  * amdgpu_ring_lru_touch - mark a ring as recently being used
  *
  * @adev: amdgpu_device pointer
  * @ring: ring to touch
  *
  * Move @ring to the tail of the lru list
  */
 void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 3967f7b..10d6692 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -180,32 +180,32 @@ struct amdgpu_ring {
 	unsigned		cond_exe_offs;
 	u64			cond_exe_gpu_addr;
 	volatile u32		*cond_exe_cpu_addr;
 	unsigned		vm_inv_eng;
 #if defined(CONFIG_DEBUG_FS)
 	struct dentry *ent;
 #endif
 };
 
 int amdgpu_ring_is_valid_index(struct amdgpu_device *adev,
 			       int hw_ip, int ring);
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned ring_size, struct amdgpu_irq_src *irq_src,
 		     unsigned irq_type);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
-int amdgpu_ring_lru_get(struct amdgpu_device *adev, int hw_ip,
-			struct amdgpu_ring **ring);
+int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist,
+			int num_blacklist, struct amdgpu_ring **ring);
 void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring);
 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 {
 	int i = 0;
 	while (i <= ring->buf_mask)
 		ring->ring[i++] = ring->funcs->nop;
 
 }
 
 #endif
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 6/6] drm/amdgpu: use LRU mapping policy for SDMA engines
       [not found] ` <20170421234836.18620-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (4 preceding siblings ...)
  2017-04-21 23:48   ` [PATCH 5/6] drm/amdgpu: guarantee bijective mapping of ring ids for LRU v3 Andres Rodriguez
@ 2017-04-21 23:48   ` Andres Rodriguez
  2017-04-24  7:40   ` [PATCH] LRU map compute/SDMA user ring ids to kernel ring ids v2 Christian König
  6 siblings, 0 replies; 12+ messages in thread
From: Andres Rodriguez @ 2017-04-21 23:48 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Spreading the load across multiple SDMA engines can increase memory
transfer performance.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
index 5a7c691..e8984df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -241,38 +241,38 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
 		return -EINVAL;
 	}
 
 	if (ring >= ip_num_rings) {
 		DRM_ERROR("Ring index:%d exceeds maximum:%d for ip:%d\n",
 				ring, ip_num_rings, hw_ip);
 		return -EINVAL;
 	}
 
 	mutex_lock(&mapper->lock);
 
 	*out_ring = amdgpu_get_cached_map(mapper, ring);
 	if (*out_ring) {
 		/* cache hit */
 		r = 0;
 		goto out_unlock;
 	}
 
 	switch (mapper->hw_ip) {
 	case AMDGPU_HW_IP_GFX:
-	case AMDGPU_HW_IP_DMA:
 	case AMDGPU_HW_IP_UVD:
 	case AMDGPU_HW_IP_VCE:
 		r = amdgpu_identity_map(adev, mapper, ring, out_ring);
 		break;
+	case AMDGPU_HW_IP_DMA:
 	case AMDGPU_HW_IP_COMPUTE:
 		r = amdgpu_lru_map(adev, mapper, ring, out_ring);
 		break;
 	default:
 		*out_ring = NULL;
 		r = -EINVAL;
 		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
 	}
 
 out_unlock:
 	mutex_unlock(&mapper->lock);
 	return r;
 }
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] LRU map compute/SDMA user ring ids to kernel ring ids v2
       [not found] ` <20170421234836.18620-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (5 preceding siblings ...)
  2017-04-21 23:48   ` [PATCH 6/6] drm/amdgpu: use LRU mapping policy for SDMA engines Andres Rodriguez
@ 2017-04-24  7:40   ` Christian König
       [not found]     ` <f7bf1964-c548-2cf5-3a46-b53cc7c80816-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  6 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2017-04-24  7:40 UTC (permalink / raw)
  To: Andres Rodriguez, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

I wanted to push that to our internal branch, but found that it doesn't 
apply cleanly. So please rebase on alex amd-staging-4.9 branch.

Additional to that you forgot my rb on patch #4.

Regards,
Christian.

Am 22.04.2017 um 01:48 schrieb Andres Rodriguez:
> V2 updates:
>   * Rebased
>   * All patches now have r-b
>
> Second part of the split of the series:
> Add support for high priority scheduling in amdgpu v8
>
> These patches should be close to being good enough to land.
>
> The first two patches are simple fixes I've ported from the ROCm branch. These
> still need review.
>
> I've fixed all of Christian's comments for patch 04:
> drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] LRU map compute/SDMA user ring ids to kernel ring ids v2
       [not found]     ` <f7bf1964-c548-2cf5-3a46-b53cc7c80816-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-04-24 15:25       ` Alex Deucher
       [not found]         ` <CADnq5_Pu0zNiVtf5dUAhYRyg2kuDsY8TNUQ5J14AAu7MRXXfuw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Alex Deucher @ 2017-04-24 15:25 UTC (permalink / raw)
  To: Christian König; +Cc: Andres Rodriguez, amd-gfx list

On Mon, Apr 24, 2017 at 3:40 AM, Christian König
<deathsimple@vodafone.de> wrote:
> I wanted to push that to our internal branch, but found that it doesn't
> apply cleanly. So please rebase on alex amd-staging-4.9 branch.

Please stick with my drm-next-wip tree.  It will make it easier for me
to integrate.

Alex


>
> Additional to that you forgot my rb on patch #4.
>
> Regards,
> Christian.
>
>
> Am 22.04.2017 um 01:48 schrieb Andres Rodriguez:
>>
>> V2 updates:
>>   * Rebased
>>   * All patches now have r-b
>>
>> Second part of the split of the series:
>> Add support for high priority scheduling in amdgpu v8
>>
>> These patches should be close to being good enough to land.
>>
>> The first two patches are simple fixes I've ported from the ROCm branch.
>> These
>> still need review.
>>
>> I've fixed all of Christian's comments for patch 04:
>> drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] LRU map compute/SDMA user ring ids to kernel ring ids v2
       [not found]         ` <CADnq5_Pu0zNiVtf5dUAhYRyg2kuDsY8TNUQ5J14AAu7MRXXfuw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-04-24 15:36           ` Andres Rodriguez
       [not found]             ` <d555fef1-afb7-6fc3-1b7f-30f7ed6f25fa-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Andres Rodriguez @ 2017-04-24 15:36 UTC (permalink / raw)
  To: Alex Deucher, Christian König; +Cc: amd-gfx list



On 2017-04-24 11:25 AM, Alex Deucher wrote:
> On Mon, Apr 24, 2017 at 3:40 AM, Christian König
> <deathsimple@vodafone.de> wrote:
>> I wanted to push that to our internal branch, but found that it doesn't
>> apply cleanly. So please rebase on alex amd-staging-4.9 branch.
>
> Please stick with my drm-next-wip tree.  It will make it easier for me
> to integrate.
>
> Alex
>
>

I'm guessing the tradeoff here would be a delay getting into drm-next. 
I'm okay with that as long as the patch series makes it in time for the 
4.13 feature merge window.

One exception, I would like to get the IOCTL interface patch submitted 
into a branch that will land on drm-next sooner rather than later so 
that I can begin submitting the corresponding patches to the userspace 
components. Should this be amd-staging-4.9? Or should it be somewhere else?

Regards,
Andres

>>
>> Additional to that you forgot my rb on patch #4.
>>
>> Regards,
>> Christian.
>>
>>
>> Am 22.04.2017 um 01:48 schrieb Andres Rodriguez:
>>>
>>> V2 updates:
>>>   * Rebased
>>>   * All patches now have r-b
>>>
>>> Second part of the split of the series:
>>> Add support for high priority scheduling in amdgpu v8
>>>
>>> These patches should be close to being good enough to land.
>>>
>>> The first two patches are simple fixes I've ported from the ROCm branch.
>>> These
>>> still need review.
>>>
>>> I've fixed all of Christian's comments for patch 04:
>>> drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] LRU map compute/SDMA user ring ids to kernel ring ids v2
       [not found]             ` <d555fef1-afb7-6fc3-1b7f-30f7ed6f25fa-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-04-24 22:33               ` Alex Deucher
  0 siblings, 0 replies; 12+ messages in thread
From: Alex Deucher @ 2017-04-24 22:33 UTC (permalink / raw)
  To: Andres Rodriguez; +Cc: Christian König, amd-gfx list

On Mon, Apr 24, 2017 at 11:36 AM, Andres Rodriguez <andresx7@gmail.com> wrote:
>
>
> On 2017-04-24 11:25 AM, Alex Deucher wrote:
>>
>> On Mon, Apr 24, 2017 at 3:40 AM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>>
>>> I wanted to push that to our internal branch, but found that it doesn't
>>> apply cleanly. So please rebase on alex amd-staging-4.9 branch.
>>
>>
>> Please stick with my drm-next-wip tree.  It will make it easier for me
>> to integrate.
>>
>> Alex
>>
>>
>
> I'm guessing the tradeoff here would be a delay getting into drm-next. I'm
> okay with that as long as the patch series makes it in time for the 4.13
> feature merge window.
>
> One exception, I would like to get the IOCTL interface patch submitted into
> a branch that will land on drm-next sooner rather than later so that I can
> begin submitting the corresponding patches to the userspace components.
> Should this be amd-staging-4.9? Or should it be somewhere else?

No real trade-offs per se.  The code either needs to get backported to
our stable branch or forward ported to drm-next.  It ultimately needs
to land in both to be properly tested, etc.

Alex


>
> Regards,
> Andres
>
>
>>>
>>> Additional to that you forgot my rb on patch #4.
>>>
>>> Regards,
>>> Christian.
>>>
>>>
>>> Am 22.04.2017 um 01:48 schrieb Andres Rodriguez:
>>>>
>>>>
>>>> V2 updates:
>>>>   * Rebased
>>>>   * All patches now have r-b
>>>>
>>>> Second part of the split of the series:
>>>> Add support for high priority scheduling in amdgpu v8
>>>>
>>>> These patches should be close to being good enough to land.
>>>>
>>>> The first two patches are simple fixes I've ported from the ROCm branch.
>>>> These
>>>> still need review.
>>>>
>>>> I've fixed all of Christian's comments for patch 04:
>>>> drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4
>>>>
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>
>>>
>>>
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 6/6] drm/amdgpu: use LRU mapping policy for SDMA engines
       [not found] ` <20170413220436.21555-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-04-13 22:04   ` Andres Rodriguez
  0 siblings, 0 replies; 12+ messages in thread
From: Andres Rodriguez @ 2017-04-13 22:04 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Spreading the load across multiple SDMA engines can increase memory
transfer performance.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
index 5a7c691..e8984df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@@ -241,38 +241,38 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
 		return -EINVAL;
 	}
 
 	if (ring >= ip_num_rings) {
 		DRM_ERROR("Ring index:%d exceeds maximum:%d for ip:%d\n",
 				ring, ip_num_rings, hw_ip);
 		return -EINVAL;
 	}
 
 	mutex_lock(&mapper->lock);
 
 	*out_ring = amdgpu_get_cached_map(mapper, ring);
 	if (*out_ring) {
 		/* cache hit */
 		r = 0;
 		goto out_unlock;
 	}
 
 	switch (mapper->hw_ip) {
 	case AMDGPU_HW_IP_GFX:
-	case AMDGPU_HW_IP_DMA:
 	case AMDGPU_HW_IP_UVD:
 	case AMDGPU_HW_IP_VCE:
 		r = amdgpu_identity_map(adev, mapper, ring, out_ring);
 		break;
+	case AMDGPU_HW_IP_DMA:
 	case AMDGPU_HW_IP_COMPUTE:
 		r = amdgpu_lru_map(adev, mapper, ring, out_ring);
 		break;
 	default:
 		*out_ring = NULL;
 		r = -EINVAL;
 		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
 	}
 
 out_unlock:
 	mutex_unlock(&mapper->lock);
 	return r;
 }
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-04-24 22:33 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-21 23:48 [PATCH] LRU map compute/SDMA user ring ids to kernel ring ids v2 Andres Rodriguez
     [not found] ` <20170421234836.18620-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-21 23:48   ` [PATCH 1/6] drm/amdgpu: condense mqd programming sequence Andres Rodriguez
2017-04-21 23:48   ` [PATCH 2/6] drm/amdgpu: workaround tonga HW bug in HQD " Andres Rodriguez
2017-04-21 23:48   ` [PATCH 3/6] drm/amdgpu: untie user ring ids from kernel ring ids v4 Andres Rodriguez
2017-04-21 23:48   ` [PATCH 4/6] drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4 Andres Rodriguez
2017-04-21 23:48   ` [PATCH 5/6] drm/amdgpu: guarantee bijective mapping of ring ids for LRU v3 Andres Rodriguez
2017-04-21 23:48   ` [PATCH 6/6] drm/amdgpu: use LRU mapping policy for SDMA engines Andres Rodriguez
2017-04-24  7:40   ` [PATCH] LRU map compute/SDMA user ring ids to kernel ring ids v2 Christian König
     [not found]     ` <f7bf1964-c548-2cf5-3a46-b53cc7c80816-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-04-24 15:25       ` Alex Deucher
     [not found]         ` <CADnq5_Pu0zNiVtf5dUAhYRyg2kuDsY8TNUQ5J14AAu7MRXXfuw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-04-24 15:36           ` Andres Rodriguez
     [not found]             ` <d555fef1-afb7-6fc3-1b7f-30f7ed6f25fa-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-24 22:33               ` Alex Deucher
  -- strict thread matches above, loose matches on Subject: below --
2017-04-13 22:04 [PATCH split 2/3] LRU map compute/SDMA user ring ids to kernel ring ids Andres Rodriguez
     [not found] ` <20170413220436.21555-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-13 22:04   ` [PATCH 6/6] drm/amdgpu: use LRU mapping policy for SDMA engines Andres Rodriguez

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.