All of lore.kernel.org
 help / color / mirror / Atom feed
* Change queue/pipe split between amdkfd and amdgpu
@ 2017-02-04  4:51 Andres Rodriguez
       [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-04  4:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, John.Bridgman-5C7GfCeVMHo

The current queue/pipe split policy is for amdgpu to take the first pipe of
MEC0 and leave the rest for amdkfd to use. This policy is taken as an
assumption in a few areas of the implementation.

This patch series aims to allow for flexible/tunable queue/pipe split policies
between kgd and kfd. It also updates the queue/pipe split policy to one that 
allows better compute app concurrency for both drivers.

In the process some duplicate code and hardcoded constants were removed.

Any suggestions or feedback on improvements welcome.

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 01/13] drm/amdgpu: refactor MQD/HQD initialization
       [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-02-04  4:51   ` Andres Rodriguez
  2017-02-04  4:51   ` [PATCH 02/13] drm/amdgpu: doorbell registers need only be set once Andres Rodriguez
                     ` (12 subsequent siblings)
  13 siblings, 0 replies; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-04  4:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, Andres Rodriguez,
	John.Bridgman-5C7GfCeVMHo

The MQD programming sequence currently exists in 3 different places.
Refactor it to absorb all the duplicates.

The success path remains mostly identical except for a slightly
different order in the non-kiq case. This shouldn't matter if the HQD
is disabled.

The error handling paths have been updated to deal with the new code
structure.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 447 ++++++++++++++++++----------------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 417 +++++++++++--------------------
 2 files changed, 387 insertions(+), 477 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index e3589b5..0d7905e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -49,6 +49,8 @@
 
 #define GFX7_NUM_GFX_RINGS     1
 #define GFX7_NUM_COMPUTE_RINGS 8
+#define GFX7_MEC_HPD_SIZE      2048
+
 
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -2791,8 +2793,6 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
 	}
 }
 
-#define MEC_HPD_SIZE 2048
-
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
@@ -2810,7 +2810,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
-				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
+				     adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
@@ -2840,7 +2840,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 	}
 
 	/* clear memory.  Not sure if this is required or not */
-	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
+	memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2);
 
 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
@@ -2915,247 +2915,282 @@ struct bonaire_mqd
 	u32 interrupt_queue[64];
 };
 
-/**
- * gfx_v7_0_cp_compute_resume - setup the compute queue registers
- *
- * @adev: amdgpu_device pointer
- *
- * Program the compute queues and test them to make sure they
- * are working.
- * Returns 0 for success, error for failure.
- */
-static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
+static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int me, int pipe)
 {
-	int r, i, j;
-	u32 tmp;
-	bool use_doorbell = true;
-	u64 hqd_gpu_addr;
-	u64 mqd_gpu_addr;
 	u64 eop_gpu_addr;
-	u64 wb_gpu_addr;
-	u32 *buf;
-	struct bonaire_mqd *mqd;
-	struct amdgpu_ring *ring;
-
-	/* fix up chicken bits */
-	tmp = RREG32(mmCP_CPF_DEBUG);
-	tmp |= (1 << 23);
-	WREG32(mmCP_CPF_DEBUG, tmp);
+	u32 tmp;
+	size_t eop_offset = me * pipe * GFX7_MEC_HPD_SIZE * 2;
 
-	/* init the pipes */
 	mutex_lock(&adev->srbm_mutex);
-	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
-		int me = (i < 4) ? 1 : 2;
-		int pipe = (i < 4) ? i : (i - 4);
+	eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
 
-		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
+	cik_srbm_select(adev, me, pipe, 0, 0);
 
-		cik_srbm_select(adev, me, pipe, 0, 0);
+	/* write the EOP addr */
+	WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
+	WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
 
-		/* write the EOP addr */
-		WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
-		WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
+	/* set the VMID assigned */
+	WREG32(mmCP_HPD_EOP_VMID, 0);
 
-		/* set the VMID assigned */
-		WREG32(mmCP_HPD_EOP_VMID, 0);
+	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+	tmp = RREG32(mmCP_HPD_EOP_CONTROL);
+	tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
+	tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
+	WREG32(mmCP_HPD_EOP_CONTROL, tmp);
 
-		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-		tmp = RREG32(mmCP_HPD_EOP_CONTROL);
-		tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
-		tmp |= order_base_2(MEC_HPD_SIZE / 8);
-		WREG32(mmCP_HPD_EOP_CONTROL, tmp);
-	}
 	cik_srbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
+}
 
-	/* init the queues.  Just two for now. */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		ring = &adev->gfx.compute_ring[i];
+static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
+{
+	int i;
 
-		if (ring->mqd_obj == NULL) {
-			r = amdgpu_bo_create(adev,
-					     sizeof(struct bonaire_mqd),
-					     PAGE_SIZE, true,
-					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
-					     &ring->mqd_obj);
-			if (r) {
-				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
-				return r;
-			}
+	/* disable the queue if it's active */
+	if (RREG32(mmCP_HQD_ACTIVE) & 1) {
+		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
+		for (i = 0; i < adev->usec_timeout; i++) {
+			if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
+				break;
+			udelay(1);
 		}
 
-		r = amdgpu_bo_reserve(ring->mqd_obj, false);
-		if (unlikely(r != 0)) {
-			gfx_v7_0_cp_compute_fini(adev);
-			return r;
-		}
-		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
-				  &mqd_gpu_addr);
-		if (r) {
-			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
-			gfx_v7_0_cp_compute_fini(adev);
-			return r;
-		}
-		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
-		if (r) {
-			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
-			gfx_v7_0_cp_compute_fini(adev);
-			return r;
-		}
+		if (i == adev->usec_timeout)
+			return -ETIMEDOUT;
 
-		/* init the mqd struct */
-		memset(buf, 0, sizeof(struct bonaire_mqd));
+		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
+		WREG32(mmCP_HQD_PQ_RPTR, 0);
+		WREG32(mmCP_HQD_PQ_WPTR, 0);
+	}
 
-		mqd = (struct bonaire_mqd *)buf;
-		mqd->header = 0xC0310800;
-		mqd->static_thread_mgmt01[0] = 0xffffffff;
-		mqd->static_thread_mgmt01[1] = 0xffffffff;
-		mqd->static_thread_mgmt23[0] = 0xffffffff;
-		mqd->static_thread_mgmt23[1] = 0xffffffff;
+	return 0;
+}
 
-		mutex_lock(&adev->srbm_mutex);
-		cik_srbm_select(adev, ring->me,
-				ring->pipe,
-				ring->queue, 0);
+static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
+			     struct bonaire_mqd *mqd,
+			     uint64_t mqd_gpu_addr,
+			     struct amdgpu_ring *ring)
+{
+	u64 hqd_gpu_addr;
+	u64 wb_gpu_addr;
 
-		/* disable wptr polling */
-		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
-		tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK;
-		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
+	/* init the mqd struct */
+	memset(mqd, 0, sizeof(struct bonaire_mqd));
 
-		/* enable doorbell? */
-		mqd->queue_state.cp_hqd_pq_doorbell_control =
-			RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
-		if (use_doorbell)
-			mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
-		else
-			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
-		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
-		       mqd->queue_state.cp_hqd_pq_doorbell_control);
-
-		/* disable the queue if it's active */
-		mqd->queue_state.cp_hqd_dequeue_request = 0;
-		mqd->queue_state.cp_hqd_pq_rptr = 0;
-		mqd->queue_state.cp_hqd_pq_wptr= 0;
-		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
-			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
-			for (j = 0; j < adev->usec_timeout; j++) {
-				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
-					break;
-				udelay(1);
-			}
-			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
-			WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
-			WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
-		}
+	mqd->header = 0xC0310800;
+	mqd->static_thread_mgmt01[0] = 0xffffffff;
+	mqd->static_thread_mgmt01[1] = 0xffffffff;
+	mqd->static_thread_mgmt23[0] = 0xffffffff;
+	mqd->static_thread_mgmt23[1] = 0xffffffff;
 
-		/* set the pointer to the MQD */
-		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
-		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
-		WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
-		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
-		/* set MQD vmid to 0 */
-		mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
-		mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
-		WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
-
-		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
-		hqd_gpu_addr = ring->gpu_addr >> 8;
-		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
-		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
-		WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
-		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
-
-		/* set up the HQD, this is similar to CP_RB0_CNTL */
-		mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
-		mqd->queue_state.cp_hqd_pq_control &=
-			~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
-					CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
-
-		mqd->queue_state.cp_hqd_pq_control |=
-			order_base_2(ring->ring_size / 8);
-		mqd->queue_state.cp_hqd_pq_control |=
-			(order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
+	/* enable doorbell? */
+	mqd->queue_state.cp_hqd_pq_doorbell_control =
+		RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
+	if (ring->use_doorbell)
+		mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+	else
+		mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+
+	/* set the pointer to the MQD */
+	mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
+	mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
+
+	/* set MQD vmid to 0 */
+	mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
+	mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
+
+	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+	hqd_gpu_addr = ring->gpu_addr >> 8;
+	mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
+	mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+	/* set up the HQD, this is similar to CP_RB0_CNTL */
+	mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
+	mqd->queue_state.cp_hqd_pq_control &=
+		~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
+				CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
+
+	mqd->queue_state.cp_hqd_pq_control |=
+		order_base_2(ring->ring_size / 8);
+	mqd->queue_state.cp_hqd_pq_control |=
+		(order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
 #ifdef __BIG_ENDIAN
-		mqd->queue_state.cp_hqd_pq_control |=
-			2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
+	mqd->queue_state.cp_hqd_pq_control |=
+		2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
 #endif
-		mqd->queue_state.cp_hqd_pq_control &=
-			~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
+	mqd->queue_state.cp_hqd_pq_control &=
+		~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
 				CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
 				CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
-		mqd->queue_state.cp_hqd_pq_control |=
-			CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
-			CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
-		WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
-
-		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
-		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
-		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
-		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
-		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
-		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
-
-		/* set the wb address wether it's enabled or not */
-		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
-		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
-		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
-			upper_32_bits(wb_gpu_addr) & 0xffff;
-		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
-		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
-		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
-
-		/* enable the doorbell if requested */
-		if (use_doorbell) {
-			mqd->queue_state.cp_hqd_pq_doorbell_control =
-				RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
-			mqd->queue_state.cp_hqd_pq_doorbell_control &=
-				~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
-			mqd->queue_state.cp_hqd_pq_doorbell_control |=
-				(ring->doorbell_index <<
-				 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
-			mqd->queue_state.cp_hqd_pq_doorbell_control |=
-				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
-			mqd->queue_state.cp_hqd_pq_doorbell_control &=
-				~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
-				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
+	mqd->queue_state.cp_hqd_pq_control |=
+		CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
+		CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
 
-		} else {
-			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
+	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+	mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
+	mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* set the wb address wether it's enabled or not */
+	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+	mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
+	mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
+		upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* enable the doorbell if requested */
+	if (ring->use_doorbell) {
+		mqd->queue_state.cp_hqd_pq_doorbell_control =
+			RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
+		mqd->queue_state.cp_hqd_pq_doorbell_control &=
+			~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
+		mqd->queue_state.cp_hqd_pq_doorbell_control |=
+			(ring->doorbell_index <<
+			 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
+		mqd->queue_state.cp_hqd_pq_doorbell_control |=
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+		mqd->queue_state.cp_hqd_pq_doorbell_control &=
+			~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
+					CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
+
+	} else {
+		mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
+	}
+
+	/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+	ring->wptr = 0;
+	mqd->queue_state.cp_hqd_pq_wptr = ring->wptr;
+	mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+
+	/* set the vmid for the queue */
+	mqd->queue_state.cp_hqd_vmid = 0;
+
+	/* activate the queue */
+	mqd->queue_state.cp_hqd_active = 1;
+}
+
+static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev,
+			       struct bonaire_mqd *mqd)
+{
+	u32 tmp;
+
+	/* disable wptr polling */
+	tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+	WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
+
+	/* program MQD field to HW */
+	WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
+	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
+	WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
+	WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
+	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
+	WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
+	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
+	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
+	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->queue_state.cp_hqd_pq_rptr_report_addr);
+	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
+	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->queue_state.cp_hqd_pq_doorbell_control);
+	WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
+	WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
+
+	/* activate the HQD */
+	WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
+
+	return 0;
+}
+
+static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
+{
+	int r;
+	u64 mqd_gpu_addr;
+	struct bonaire_mqd *mqd;
+	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+	if (ring->mqd_obj == NULL) {
+		r = amdgpu_bo_create(adev,
+				sizeof(struct bonaire_mqd),
+				PAGE_SIZE, true,
+				AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
+				&ring->mqd_obj);
+		if (r) {
+			dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
+			return r;
 		}
-		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
-		       mqd->queue_state.cp_hqd_pq_doorbell_control);
+	}
+
+	r = amdgpu_bo_reserve(ring->mqd_obj, false);
+	if (unlikely(r != 0))
+		goto out;
+
+	r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
+			&mqd_gpu_addr);
+	if (r) {
+		dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
+		goto out_unreserve;
+	}
+	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
+	if (r) {
+		dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
+		goto out_unreserve;
+	}
+
+	mutex_lock(&adev->srbm_mutex);
+	cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 
-		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
-		ring->wptr = 0;
-		mqd->queue_state.cp_hqd_pq_wptr = ring->wptr;
-		WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
-		mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+	gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
+	gfx_v7_0_mqd_deactivate(adev);
+	gfx_v7_0_mqd_commit(adev, mqd);
 
-		/* set the vmid for the queue */
-		mqd->queue_state.cp_hqd_vmid = 0;
-		WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
+	cik_srbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
 
-		/* activate the queue */
-		mqd->queue_state.cp_hqd_active = 1;
-		WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
+	amdgpu_bo_kunmap(ring->mqd_obj);
+out_unreserve:
+	amdgpu_bo_unreserve(ring->mqd_obj);
+out:
+	return 0;
+}
+
+/**
+ * gfx_v7_0_cp_compute_resume - setup the compute queue registers
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Program the compute queues and test them to make sure they
+ * are working.
+ * Returns 0 for success, error for failure.
+ */
+static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
+{
+	int r, i, j;
+	u32 tmp;
+	struct amdgpu_ring *ring;
 
-		cik_srbm_select(adev, 0, 0, 0, 0);
-		mutex_unlock(&adev->srbm_mutex);
+	/* fix up chicken bits */
+	tmp = RREG32(mmCP_CPF_DEBUG);
+	tmp |= (1 << 23);
+	WREG32(mmCP_CPF_DEBUG, tmp);
 
-		amdgpu_bo_kunmap(ring->mqd_obj);
-		amdgpu_bo_unreserve(ring->mqd_obj);
+	/* init the pipes */
+	for (i = 0; i < adev->gfx.mec.num_mec; i++)
+		for (j = 0; j < adev->gfx.mec.num_pipe; j++)
+			gfx_v7_0_compute_pipe_init(adev, i, j);
 
-		ring->ready = true;
+	/* init the queues */
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		r = gfx_v7_0_compute_queue_init(adev, i);
+		if (r) {
+			gfx_v7_0_cp_compute_fini(adev);
+			return r;
+		}
 	}
 
 	gfx_v7_0_cp_compute_enable(adev, true);
 
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		ring = &adev->gfx.compute_ring[i];
-
+		ring->ready = true;
 		r = amdgpu_ring_test_ring(ring);
 		if (r)
 			ring->ready = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 35f9cd8..cf738e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -53,6 +53,8 @@
 
 #define GFX8_NUM_GFX_RINGS     1
 #define GFX8_NUM_COMPUTE_RINGS 8
+#define GFX8_MEC_HPD_SIZE 2048
+
 
 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
@@ -1416,7 +1418,7 @@ static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
 	irq->data = NULL;
 }
 
-#define MEC_HPD_SIZE 2048
+#define GFX8_MEC_HPD_SIZE 2048
 
 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 {
@@ -1433,7 +1435,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
-				     adev->gfx.mec.num_queue * MEC_HPD_SIZE,
+				     adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
@@ -1462,7 +1464,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 		return r;
 	}
 
-	memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
+	memset(hpd, 0, adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE);
 
 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
@@ -1484,7 +1486,7 @@ static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
 	u32 *hpd;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
-	r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
+	r = amdgpu_bo_create_kernel(adev, GFX8_MEC_HPD_SIZE, PAGE_SIZE,
 				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
 				    &kiq->eop_gpu_addr, (void **)&hpd);
 	if (r) {
@@ -1492,7 +1494,7 @@ static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
 		return r;
 	}
 
-	memset(hpd, 0, MEC_HPD_SIZE);
+	memset(hpd, 0, GFX8_MEC_HPD_SIZE);
 
 	amdgpu_bo_kunmap(kiq->eop_obj);
 
@@ -4657,6 +4659,9 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
 	uint32_t tmp;
 
+	/* init the mqd struct */
+	memset(mqd, 0, sizeof(struct vi_mqd));
+
 	mqd->header = 0xC0310800;
 	mqd->compute_pipelinestat_enable = 0x00000001;
 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
@@ -4672,7 +4677,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
 	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
-			(order_base_2(MEC_HPD_SIZE / 4) - 1));
+			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
 
 	mqd->cp_hqd_eop_control = tmp;
 
@@ -4688,11 +4693,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
 
 	mqd->cp_hqd_pq_doorbell_control = tmp;
 
-	/* disable the queue if it's active */
-	mqd->cp_hqd_dequeue_request = 0;
-	mqd->cp_hqd_pq_rptr = 0;
-	mqd->cp_hqd_pq_wptr = 0;
-
 	/* set the pointer to the MQD */
 	mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
 	mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
@@ -4768,12 +4768,54 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
-				      struct vi_mqd *mqd,
-				      struct amdgpu_ring *ring)
+static int gfx_v8_0_mqd_deactivate(struct amdgpu_device *adev)
+{
+	int i;
+
+	/* disable the queue if it's active */
+	if (RREG32(mmCP_HQD_ACTIVE) & 1) {
+		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
+		for (i = 0; i < adev->usec_timeout; i++) {
+			if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
+				break;
+			udelay(1);
+		}
+
+		if (i == adev->usec_timeout)
+			return -ETIMEDOUT;
+
+		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
+		WREG32(mmCP_HQD_PQ_RPTR, 0);
+		WREG32(mmCP_HQD_PQ_WPTR, 0);
+	}
+
+	return 0;
+}
+
+static void gfx_v8_0_enable_doorbell(struct amdgpu_device *adev, bool enable)
+{
+	uint32_t tmp;
+
+	if (!enable)
+		return;
+
+	if ((adev->asic_type == CHIP_CARRIZO) ||
+			(adev->asic_type == CHIP_FIJI) ||
+			(adev->asic_type == CHIP_STONEY) ||
+			(adev->asic_type == CHIP_POLARIS11) ||
+			(adev->asic_type == CHIP_POLARIS10)) {
+		WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2);
+		WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2);
+	}
+
+	tmp = RREG32(mmCP_PQ_STATUS);
+	tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
+	WREG32(mmCP_PQ_STATUS, tmp);
+}
+
+static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd)
 {
 	uint32_t tmp;
-	int j;
 
 	/* disable wptr polling */
 	tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
@@ -4789,18 +4831,10 @@ static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
 	/* enable doorbell? */
 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
 
-	/* disable the queue if it's active */
-	if (RREG32(mmCP_HQD_ACTIVE) & 1) {
-		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
-		for (j = 0; j < adev->usec_timeout; j++) {
-			if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
-				break;
-			udelay(1);
-		}
-		WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
-		WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
-		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
-	}
+	/* set pq read/write pointers */
+	WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
+	WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
+	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
 
 	/* set the pointer to the MQD */
 	WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
@@ -4827,16 +4861,6 @@ static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
 	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
 
 	/* enable the doorbell if requested */
-	if (ring->use_doorbell) {
-		if ((adev->asic_type == CHIP_CARRIZO) ||
-				(adev->asic_type == CHIP_FIJI) ||
-				(adev->asic_type == CHIP_STONEY)) {
-			WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
-						AMDGPU_DOORBELL_KIQ << 2);
-			WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
-						AMDGPU_DOORBELL_MEC_RING7 << 2);
-		}
-	}
 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
 
 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
@@ -4850,16 +4874,10 @@ static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
 	/* activate the queue */
 	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 
-	if (ring->use_doorbell) {
-		tmp = RREG32(mmCP_PQ_STATUS);
-		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
-		WREG32(mmCP_PQ_STATUS, tmp);
-	}
-
 	return 0;
 }
 
-static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
+static int gfx_v8_0_kiq_queue_init(struct amdgpu_ring *ring,
 				   struct vi_mqd *mqd,
 				   u64 mqd_gpu_addr)
 {
@@ -4876,15 +4894,18 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
 		gfx_v8_0_kiq_setting(&kiq->ring);
 	} else
 		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
-					ring->queue * MEC_HPD_SIZE;
+					ring->queue * GFX8_MEC_HPD_SIZE;
 
 	mutex_lock(&adev->srbm_mutex);
 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 
 	gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
 
-	if (is_kiq)
-		gfx_v8_0_kiq_init_register(adev, mqd, ring);
+	if (is_kiq) {
+		gfx_v8_0_mqd_deactivate(adev);
+		gfx_v8_0_enable_doorbell(adev, ring->use_doorbell);
+		gfx_v8_0_mqd_commit(adev, mqd);
+	}
 
 	vi_srbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
@@ -4921,9 +4942,10 @@ static int gfx_v8_0_kiq_setup_queue(struct amdgpu_device *adev,
 	u32 *buf;
 	int r = 0;
 
-	r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
-				    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
-				    &mqd_gpu_addr, (void **)&buf);
+	r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd),
+			PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+			&ring->mqd_obj, &mqd_gpu_addr,
+			(void **)&buf);
 	if (r) {
 		dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
 		return r;
@@ -4933,7 +4955,7 @@ static int gfx_v8_0_kiq_setup_queue(struct amdgpu_device *adev,
 	memset(buf, 0, sizeof(struct vi_mqd));
 	mqd = (struct vi_mqd *)buf;
 
-	r = gfx_v8_0_kiq_init_queue(ring, mqd, mqd_gpu_addr);
+	r = gfx_v8_0_kiq_queue_init(ring, mqd, mqd_gpu_addr);
 	if (r)
 		return r;
 
@@ -4979,246 +5001,99 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
 	return 0;
 }
 
-static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
+static int gfx_v8_0_compute_queue_init(struct amdgpu_device *adev,
+				       int ring_id)
 {
-	int r, i, j;
-	u32 tmp;
-	bool use_doorbell = true;
-	u64 hqd_gpu_addr;
-	u64 mqd_gpu_addr;
+	int r;
 	u64 eop_gpu_addr;
-	u64 wb_gpu_addr;
-	u32 *buf;
+	u64 mqd_gpu_addr;
 	struct vi_mqd *mqd;
+	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
 
-	/* init the queues.  */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
-
-		if (ring->mqd_obj == NULL) {
-			r = amdgpu_bo_create(adev,
-					     sizeof(struct vi_mqd),
-					     PAGE_SIZE, true,
-					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
-					     NULL, &ring->mqd_obj);
-			if (r) {
-				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
-				return r;
-			}
-		}
-
-		r = amdgpu_bo_reserve(ring->mqd_obj, false);
-		if (unlikely(r != 0)) {
-			gfx_v8_0_cp_compute_fini(adev);
-			return r;
-		}
-		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
-				  &mqd_gpu_addr);
-		if (r) {
-			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
-			gfx_v8_0_cp_compute_fini(adev);
-			return r;
-		}
-		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
+	if (ring->mqd_obj == NULL) {
+		r = amdgpu_bo_create(adev,
+				sizeof(struct vi_mqd),
+				PAGE_SIZE, true,
+				AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
+				NULL, &ring->mqd_obj);
 		if (r) {
-			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
-			gfx_v8_0_cp_compute_fini(adev);
+			dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
 			return r;
 		}
+	}
 
-		/* init the mqd struct */
-		memset(buf, 0, sizeof(struct vi_mqd));
-
-		mqd = (struct vi_mqd *)buf;
-		mqd->header = 0xC0310800;
-		mqd->compute_pipelinestat_enable = 0x00000001;
-		mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
-		mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
-		mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
-		mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
-		mqd->compute_misc_reserved = 0x00000003;
-
-		mutex_lock(&adev->srbm_mutex);
-		vi_srbm_select(adev, ring->me,
-			       ring->pipe,
-			       ring->queue, 0);
-
-		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
-		eop_gpu_addr >>= 8;
-
-		/* write the EOP addr */
-		WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
-		WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
-
-		/* set the VMID assigned */
-		WREG32(mmCP_HQD_VMID, 0);
-
-		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-		tmp = RREG32(mmCP_HQD_EOP_CONTROL);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
-				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
-		WREG32(mmCP_HQD_EOP_CONTROL, tmp);
-
-		/* disable wptr polling */
-		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
-		tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
-		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
-
-		mqd->cp_hqd_eop_base_addr_lo =
-			RREG32(mmCP_HQD_EOP_BASE_ADDR);
-		mqd->cp_hqd_eop_base_addr_hi =
-			RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
-
-		/* enable doorbell? */
-		tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
-		if (use_doorbell) {
-			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-		} else {
-			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
-		}
-		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
-		mqd->cp_hqd_pq_doorbell_control = tmp;
-
-		/* disable the queue if it's active */
-		mqd->cp_hqd_dequeue_request = 0;
-		mqd->cp_hqd_pq_rptr = 0;
-		mqd->cp_hqd_pq_wptr= 0;
-		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
-			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
-			for (j = 0; j < adev->usec_timeout; j++) {
-				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
-					break;
-				udelay(1);
-			}
-			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
-			WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
-			WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
-		}
+	r = amdgpu_bo_reserve(ring->mqd_obj, false);
+	if (unlikely(r != 0))
+		goto out;
 
-		/* set the pointer to the MQD */
-		mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
-		mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
-		WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
-		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
-
-		/* set MQD vmid to 0 */
-		tmp = RREG32(mmCP_MQD_CONTROL);
-		tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
-		WREG32(mmCP_MQD_CONTROL, tmp);
-		mqd->cp_mqd_control = tmp;
-
-		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
-		hqd_gpu_addr = ring->gpu_addr >> 8;
-		mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
-		mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
-		WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
-		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
-
-		/* set up the HQD, this is similar to CP_RB0_CNTL */
-		tmp = RREG32(mmCP_HQD_PQ_CONTROL);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
-				    (order_base_2(ring->ring_size / 4) - 1));
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
-			       ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
-#ifdef __BIG_ENDIAN
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
-#endif
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
-		WREG32(mmCP_HQD_PQ_CONTROL, tmp);
-		mqd->cp_hqd_pq_control = tmp;
-
-		/* set the wb address wether it's enabled or not */
-		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
-		mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
-		mqd->cp_hqd_pq_rptr_report_addr_hi =
-			upper_32_bits(wb_gpu_addr) & 0xffff;
-		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
-		       mqd->cp_hqd_pq_rptr_report_addr_lo);
-		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-		       mqd->cp_hqd_pq_rptr_report_addr_hi);
-
-		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
-		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-		mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
-		mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
-		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
-		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
-		       mqd->cp_hqd_pq_wptr_poll_addr_hi);
-
-		/* enable the doorbell if requested */
-		if (use_doorbell) {
-			if ((adev->asic_type == CHIP_CARRIZO) ||
-			    (adev->asic_type == CHIP_FIJI) ||
-			    (adev->asic_type == CHIP_STONEY) ||
-			    (adev->asic_type == CHIP_POLARIS11) ||
-			    (adev->asic_type == CHIP_POLARIS10) ||
-			    (adev->asic_type == CHIP_POLARIS12)) {
-				WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
-				       AMDGPU_DOORBELL_KIQ << 2);
-				WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
-				       AMDGPU_DOORBELL_MEC_RING7 << 2);
-			}
-			tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
-			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
-					    DOORBELL_OFFSET, ring->doorbell_index);
-			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
-			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
-			mqd->cp_hqd_pq_doorbell_control = tmp;
+	r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
+			&mqd_gpu_addr);
+	if (r) {
+		dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
+		goto out_unreserve;
+	}
+	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
+	if (r) {
+		dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
+		goto out_unreserve;
+	}
 
-		} else {
-			mqd->cp_hqd_pq_doorbell_control = 0;
-		}
-		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
-		       mqd->cp_hqd_pq_doorbell_control);
-
-		/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
-		ring->wptr = 0;
-		mqd->cp_hqd_pq_wptr = ring->wptr;
-		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
-		mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
-
-		/* set the vmid for the queue */
-		mqd->cp_hqd_vmid = 0;
-		WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
-
-		tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
-		WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
-		mqd->cp_hqd_persistent_state = tmp;
-		if (adev->asic_type == CHIP_STONEY ||
-			adev->asic_type == CHIP_POLARIS11 ||
-			adev->asic_type == CHIP_POLARIS10 ||
-			adev->asic_type == CHIP_POLARIS12) {
-			tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
-			tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
-			WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
-		}
+	eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX8_MEC_HPD_SIZE);
+	eop_gpu_addr >>= 8;
+
+	/* init the mqd struct */
+	memset(mqd, 0, sizeof(struct vi_mqd));
+
+	mutex_lock(&adev->srbm_mutex);
+	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+	gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
 
-		/* activate the queue */
-		mqd->cp_hqd_active = 1;
-		WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
+	gfx_v8_0_mqd_deactivate(adev);
+	gfx_v8_0_enable_doorbell(adev, ring->use_doorbell);
+	gfx_v8_0_mqd_commit(adev, mqd);
 
-		vi_srbm_select(adev, 0, 0, 0, 0);
-		mutex_unlock(&adev->srbm_mutex);
+	vi_srbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+
+	amdgpu_bo_kunmap(ring->mqd_obj);
+out_unreserve:
+	amdgpu_bo_unreserve(ring->mqd_obj);
+out:
+	return r;
+}
 
-		amdgpu_bo_kunmap(ring->mqd_obj);
-		amdgpu_bo_unreserve(ring->mqd_obj);
+static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
+{
+	int r, i;
+	u32 tmp;
+	struct amdgpu_ring *ring;
+
+	/* Stating with gfxv8, all the pipe specific state was removed
+	 * The fields have been moved to be per-HQD now. */
+
+	/* init the queues */
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		r = gfx_v8_0_compute_queue_init(adev, i);
+		if (r) {
+			gfx_v8_0_cp_compute_fini(adev);
+			return r;
+		}
 	}
 
-	if (use_doorbell) {
-		tmp = RREG32(mmCP_PQ_STATUS);
-		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
-		WREG32(mmCP_PQ_STATUS, tmp);
+	if (adev->asic_type == CHIP_STONEY ||
+	    adev->asic_type == CHIP_POLARIS11 ||
+	    adev->asic_type == CHIP_POLARIS10 ||
+	    adev->asic_type == CHIP_POLARIS12) {
+		tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
+		tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
+		WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
 	}
 
 	gfx_v8_0_cp_compute_enable(adev, true);
 
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+		ring = &adev->gfx.compute_ring[i];
 
 		ring->ready = true;
 		r = amdgpu_ring_test_ring(ring);
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 02/13] drm/amdgpu: doorbell registers need only be set once
       [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-02-04  4:51   ` [PATCH 01/13] drm/amdgpu: refactor MQD/HQD initialization Andres Rodriguez
@ 2017-02-04  4:51   ` Andres Rodriguez
       [not found]     ` <20170204045142.5596-3-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-02-04  4:51   ` [PATCH 03/13] drm/amdgpu: detect timeout error when deactivating hqd Andres Rodriguez
                     ` (11 subsequent siblings)
  13 siblings, 1 reply; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-04  4:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, Andres Rodriguez,
	John.Bridgman-5C7GfCeVMHo

The CP_MEC_DOORBELL_RANGE_* and CP_PQ_STATUS.DOORBELL_ENABLE registers
are not HQD specific.

They only need to be set once if at least 1 pipe requested doorbell
support.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 147ce0e..9740800 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1427,6 +1427,7 @@ struct amdgpu_device {
 	unsigned			num_rings;
 	struct amdgpu_ring		*rings[AMDGPU_MAX_RINGS];
 	bool				ib_pool_ready;
+	bool				doorbell_enabled;
 	struct amdgpu_sa_manager	ring_tmp_bo;
 
 	/* interrupts */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index cf738e5..5d0e2c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4796,7 +4796,7 @@ static void gfx_v8_0_enable_doorbell(struct amdgpu_device *adev, bool enable)
 {
 	uint32_t tmp;
 
-	if (!enable)
+	if (!enable || adev->doorbell_enabled)
 		return;
 
 	if ((adev->asic_type == CHIP_CARRIZO) ||
@@ -4811,6 +4811,8 @@ static void gfx_v8_0_enable_doorbell(struct amdgpu_device *adev, bool enable)
 	tmp = RREG32(mmCP_PQ_STATUS);
 	tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
 	WREG32(mmCP_PQ_STATUS, tmp);
+
+	adev->doorbell_enabled = true;
 }
 
 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd)
@@ -5108,6 +5110,8 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
 {
 	int r;
 
+	adev->doorbell_enabled = false;
+
 	if (!(adev->flags & AMD_IS_APU))
 		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
 
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 03/13] drm/amdgpu: detect timeout error when deactivating hqd
       [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-02-04  4:51   ` [PATCH 01/13] drm/amdgpu: refactor MQD/HQD initialization Andres Rodriguez
  2017-02-04  4:51   ` [PATCH 02/13] drm/amdgpu: doorbell registers need only be set once Andres Rodriguez
@ 2017-02-04  4:51   ` Andres Rodriguez
  2017-02-04  4:51   ` [PATCH 04/13] drm/amdgpu: remove duplicate definition of cik_mqd Andres Rodriguez
                     ` (10 subsequent siblings)
  13 siblings, 0 replies; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-04  4:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, Andres Rodriguez,
	John.Bridgman-5C7GfCeVMHo

Handle HQD deactivation timeouts instead of ignoring them.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 5d0e2c8..442cd66 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4883,6 +4883,7 @@ static int gfx_v8_0_kiq_queue_init(struct amdgpu_ring *ring,
 				   struct vi_mqd *mqd,
 				   u64 mqd_gpu_addr)
 {
+	int r = 0;
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	uint64_t eop_gpu_addr;
@@ -4904,7 +4905,12 @@ static int gfx_v8_0_kiq_queue_init(struct amdgpu_ring *ring,
 	gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
 
 	if (is_kiq) {
-		gfx_v8_0_mqd_deactivate(adev);
+		r = gfx_v8_0_mqd_deactivate(adev);
+		if (r) {
+			dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
+			goto out_unlock;
+		}
+
 		gfx_v8_0_enable_doorbell(adev, ring->use_doorbell);
 		gfx_v8_0_mqd_commit(adev, mqd);
 	}
@@ -4918,6 +4924,12 @@ static int gfx_v8_0_kiq_queue_init(struct amdgpu_ring *ring,
 		gfx_v8_0_map_queue_enable(&kiq->ring, ring);
 
 	return 0;
+
+out_unlock:
+	vi_srbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+
+	return r;
 }
 
 static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev)
@@ -5051,10 +5063,16 @@ static int gfx_v8_0_compute_queue_init(struct amdgpu_device *adev,
 
 	gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
 
-	gfx_v8_0_mqd_deactivate(adev);
+	r = gfx_v8_0_mqd_deactivate(adev);
+	if (r) {
+		dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
+		goto out_unlock;
+	}
+
 	gfx_v8_0_enable_doorbell(adev, ring->use_doorbell);
 	gfx_v8_0_mqd_commit(adev, mqd);
 
+out_unlock:
 	vi_srbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
 
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 04/13] drm/amdgpu: remove duplicate definition of cik_mqd
       [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-02-04  4:51   ` [PATCH 03/13] drm/amdgpu: detect timeout error when deactivating hqd Andres Rodriguez
@ 2017-02-04  4:51   ` Andres Rodriguez
  2017-02-04  4:51   ` [PATCH 05/13] drm/amdgpu: unify MQD programming sequence for kfd and amdgpu Andres Rodriguez
                     ` (9 subsequent siblings)
  13 siblings, 0 replies; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-04  4:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, Andres Rodriguez,
	John.Bridgman-5C7GfCeVMHo

The gfxv7 contains a slightly different version of cik_mqd called
bonaire_mqd. This can introduce subtle bugs if fixes are not applied in
both places.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 135 ++++++++++++++--------------------
 1 file changed, 54 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 0d7905e..4a279bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -27,6 +27,7 @@
 #include "amdgpu_gfx.h"
 #include "cikd.h"
 #include "cik.h"
+#include "cik_structs.h"
 #include "atom.h"
 #include "amdgpu_ucode.h"
 #include "clearstate_ci.h"
@@ -2887,34 +2888,6 @@ struct hqd_registers
 	u32 cp_mqd_control;
 };
 
-struct bonaire_mqd
-{
-	u32 header;
-	u32 dispatch_initiator;
-	u32 dimensions[3];
-	u32 start_idx[3];
-	u32 num_threads[3];
-	u32 pipeline_stat_enable;
-	u32 perf_counter_enable;
-	u32 pgm[2];
-	u32 tba[2];
-	u32 tma[2];
-	u32 pgm_rsrc[2];
-	u32 vmid;
-	u32 resource_limits;
-	u32 static_thread_mgmt01[2];
-	u32 tmp_ring_size;
-	u32 static_thread_mgmt23[2];
-	u32 restart[3];
-	u32 thread_trace_enable;
-	u32 reserved1;
-	u32 user_data[16];
-	u32 vgtcs_invoke_count[2];
-	struct hqd_registers queue_state;
-	u32 dequeue_cntr;
-	u32 interrupt_queue[64];
-};
-
 static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int me, int pipe)
 {
 	u64 eop_gpu_addr;
@@ -2968,7 +2941,7 @@ static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
 }
 
 static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
-			     struct bonaire_mqd *mqd,
+			     struct cik_mqd *mqd,
 			     uint64_t mqd_gpu_addr,
 			     struct amdgpu_ring *ring)
 {
@@ -2976,101 +2949,101 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
 	u64 wb_gpu_addr;
 
 	/* init the mqd struct */
-	memset(mqd, 0, sizeof(struct bonaire_mqd));
+	memset(mqd, 0, sizeof(struct cik_mqd));
 
 	mqd->header = 0xC0310800;
-	mqd->static_thread_mgmt01[0] = 0xffffffff;
-	mqd->static_thread_mgmt01[1] = 0xffffffff;
-	mqd->static_thread_mgmt23[0] = 0xffffffff;
-	mqd->static_thread_mgmt23[1] = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
 
 	/* enable doorbell? */
-	mqd->queue_state.cp_hqd_pq_doorbell_control =
+	mqd->cp_hqd_pq_doorbell_control =
 		RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
 	if (ring->use_doorbell)
-		mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+		mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
 	else
-		mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+		mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
 
 	/* set the pointer to the MQD */
-	mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
-	mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
+	mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
+	mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
 
 	/* set MQD vmid to 0 */
-	mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
-	mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
+	mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
+	mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
 
 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
 	hqd_gpu_addr = ring->gpu_addr >> 8;
-	mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
-	mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
 
 	/* set up the HQD, this is similar to CP_RB0_CNTL */
-	mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
-	mqd->queue_state.cp_hqd_pq_control &=
+	mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
+	mqd->cp_hqd_pq_control &=
 		~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
 				CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
 
-	mqd->queue_state.cp_hqd_pq_control |=
+	mqd->cp_hqd_pq_control |=
 		order_base_2(ring->ring_size / 8);
-	mqd->queue_state.cp_hqd_pq_control |=
+	mqd->cp_hqd_pq_control |=
 		(order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
 #ifdef __BIG_ENDIAN
-	mqd->queue_state.cp_hqd_pq_control |=
+	mqd->cp_hqd_pq_control |=
 		2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
 #endif
-	mqd->queue_state.cp_hqd_pq_control &=
+	mqd->cp_hqd_pq_control &=
 		~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
 				CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
 				CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
-	mqd->queue_state.cp_hqd_pq_control |=
+	mqd->cp_hqd_pq_control |=
 		CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
 		CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
 
 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-	mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
-	mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
 
 	/* set the wb address wether it's enabled or not */
 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
-	mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
-	mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
+	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_hqd_pq_rptr_report_addr_hi =
 		upper_32_bits(wb_gpu_addr) & 0xffff;
 
 	/* enable the doorbell if requested */
 	if (ring->use_doorbell) {
-		mqd->queue_state.cp_hqd_pq_doorbell_control =
+		mqd->cp_hqd_pq_doorbell_control =
 			RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
-		mqd->queue_state.cp_hqd_pq_doorbell_control &=
+		mqd->cp_hqd_pq_doorbell_control &=
 			~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
-		mqd->queue_state.cp_hqd_pq_doorbell_control |=
+		mqd->cp_hqd_pq_doorbell_control |=
 			(ring->doorbell_index <<
 			 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
-		mqd->queue_state.cp_hqd_pq_doorbell_control |=
+		mqd->cp_hqd_pq_doorbell_control |=
 			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
-		mqd->queue_state.cp_hqd_pq_doorbell_control &=
+		mqd->cp_hqd_pq_doorbell_control &=
 			~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
 					CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
 
 	} else {
-		mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
+		mqd->cp_hqd_pq_doorbell_control = 0;
 	}
 
 	/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
 	ring->wptr = 0;
-	mqd->queue_state.cp_hqd_pq_wptr = ring->wptr;
-	mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+	mqd->cp_hqd_pq_wptr = ring->wptr;
+	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
 
 	/* set the vmid for the queue */
-	mqd->queue_state.cp_hqd_vmid = 0;
+	mqd->cp_hqd_vmid = 0;
 
 	/* activate the queue */
-	mqd->queue_state.cp_hqd_active = 1;
+	mqd->cp_hqd_active = 1;
 }
 
 static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev,
-			       struct bonaire_mqd *mqd)
+			       struct cik_mqd *mqd)
 {
 	u32 tmp;
 
@@ -3080,22 +3053,22 @@ static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev,
 	WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
 
 	/* program MQD field to HW */
-	WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
-	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
-	WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
-	WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
-	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
-	WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->queue_state.cp_hqd_pq_rptr_report_addr);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
-	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->queue_state.cp_hqd_pq_doorbell_control);
-	WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
-	WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
+	WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
+	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
+	WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
+	WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
+	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
+	WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
+	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
+	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
+	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo);
+	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi);
+	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
+	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
+	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
 
 	/* activate the HQD */
-	WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
+	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 
 	return 0;
 }
@@ -3104,12 +3077,12 @@ static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
 {
 	int r;
 	u64 mqd_gpu_addr;
-	struct bonaire_mqd *mqd;
+	struct cik_mqd *mqd;
 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
 
 	if (ring->mqd_obj == NULL) {
 		r = amdgpu_bo_create(adev,
-				sizeof(struct bonaire_mqd),
+				sizeof(struct cik_mqd),
 				PAGE_SIZE, true,
 				AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				&ring->mqd_obj);
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 05/13] drm/amdgpu: unify MQD programming sequence for kfd and amdgpu
       [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (3 preceding siblings ...)
  2017-02-04  4:51   ` [PATCH 04/13] drm/amdgpu: remove duplicate definition of cik_mqd Andres Rodriguez
@ 2017-02-04  4:51   ` Andres Rodriguez
       [not found]     ` <20170204045142.5596-6-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-02-04  4:51   ` [PATCH 06/13] drm/amdgpu: rename rdev to adev Andres Rodriguez
                     ` (8 subsequent siblings)
  13 siblings, 1 reply; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-04  4:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, Andres Rodriguez,
	John.Bridgman-5C7GfCeVMHo

Use the same gfx_*_mqd_commit function for kfd and amdgpu codepaths.

This removes the last duplicates of this programming sequence.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 51 ++---------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 49 ++--------------------
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c             | 38 ++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h             |  5 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c             | 44 ++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h             |  5 +++
 6 files changed, 97 insertions(+), 95 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 1a0a5f7..038b7ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -29,6 +29,7 @@
 #include "cikd.h"
 #include "cik_sdma.h"
 #include "amdgpu_ucode.h"
+#include "gfx_v7_0.h"
 #include "gca/gfx_7_2_d.h"
 #include "gca/gfx_7_2_enum.h"
 #include "gca/gfx_7_2_sh_mask.h"
@@ -309,55 +310,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 	m = get_mqd(mqd);
 
 	is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
-
-	acquire_queue(kgd, pipe_id, queue_id);
-	WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
-	WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
-	WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
-
-	WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
-	WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
-	WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
-
-	WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
-	WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
-	WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
-
-	WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
-
-	WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
-	WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
-	WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
-
-	WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo);
-	WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi);
-	WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo);
-	WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi);
-
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-			m->cp_hqd_pq_rptr_report_addr_hi);
-
-	WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
-
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo);
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi);
-
-	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
-
-	WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
-
-	WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
-
-	WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
-	WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
-
-	WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
-
 	if (is_wptr_shadow_valid)
-		WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow);
+		m->cp_hqd_pq_wptr = wptr_shadow;
 
-	WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
+	acquire_queue(kgd, pipe_id, queue_id);
+	gfx_v7_0_mqd_commit(adev, m);
 	release_queue(kgd);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 6697612..2ecef3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -28,6 +28,7 @@
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_ucode.h"
+#include "gfx_v8_0.h"
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_enum.h"
@@ -251,53 +252,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 	m = get_mqd(mqd);
 
 	valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
-	acquire_queue(kgd, pipe_id, queue_id);
-
-	WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
-	WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
-	WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
-
-	WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
-	WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
-	WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
-	WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
-	WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
-	WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
-	WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-			m->cp_hqd_pq_rptr_report_addr_hi);
-
 	if (valid_wptr > 0)
-		WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr);
-
-	WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
-	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
-
-	WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo);
-	WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi);
-	WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control);
-	WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
-	WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
-	WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events);
-
-	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo);
-	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi);
-	WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control);
-	WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset);
-	WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size);
-	WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset);
-	WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size);
-
-	WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
-
-	WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request);
-	WREG32(mmCP_HQD_ERROR, m->cp_hqd_error);
-	WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
-	WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones);
-
-	WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
+		m->cp_hqd_pq_wptr = valid_wptr;
 
+	acquire_queue(kgd, pipe_id, queue_id);
+	gfx_v8_0_mqd_commit(adev, mqd);
 	release_queue(kgd);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 4a279bb..d226804 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -3038,12 +3038,29 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
 	/* set the vmid for the queue */
 	mqd->cp_hqd_vmid = 0;
 
+	/* defaults */
+	mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
+	mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
+	mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
+	mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
+	mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
+	mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
+	mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
+	mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
+	mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
+	mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
+	mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
+	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
+	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
+	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
+	mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
+
 	/* activate the queue */
 	mqd->cp_hqd_active = 1;
 }
 
-static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev,
-			       struct cik_mqd *mqd)
+int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
 {
 	u32 tmp;
 
@@ -3067,6 +3084,23 @@ static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev,
 	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
 	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
 
+	WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control);
+	WREG32(mmCP_HQD_IB_BASE_ADDR, mqd->cp_hqd_ib_base_addr_lo);
+	WREG32(mmCP_HQD_IB_BASE_ADDR_HI, mqd->cp_hqd_ib_base_addr_hi);
+	WREG32(mmCP_HQD_IB_RPTR, mqd->cp_hqd_ib_rptr);
+	WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
+	WREG32(mmCP_HQD_SEMA_CMD, mqd->cp_hqd_sema_cmd);
+	WREG32(mmCP_HQD_MSG_TYPE, mqd->cp_hqd_msg_type);
+	WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, mqd->cp_hqd_atomic0_preop_lo);
+	WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, mqd->cp_hqd_atomic0_preop_hi);
+	WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, mqd->cp_hqd_atomic1_preop_lo);
+	WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, mqd->cp_hqd_atomic1_preop_hi);
+	WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
+	WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum);
+	WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority);
+	WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority);
+	WREG32(mmCP_HQD_IQ_RPTR, mqd->cp_hqd_iq_rptr);
+
 	/* activate the HQD */
 	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
index 2f5164c..6fb9c15 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
@@ -29,4 +29,9 @@ extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block;
 extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block;
 extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block;
 
+struct amdgpu_device;
+struct cik_mqd;
+
+int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 442cd66..7755d58 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4762,6 +4762,26 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
 	mqd->cp_hqd_persistent_state = tmp;
 
+	/* defaults */
+	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
+	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
+	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
+	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
+	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
+	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
+	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
+	mqd->cp_hqd_ctx_save_control = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
+	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
+	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
+	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
+	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
+	mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
+	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
+	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
+	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
+	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
+
+
 	/* activate the queue */
 	mqd->cp_hqd_active = 1;
 
@@ -4815,7 +4835,7 @@ static void gfx_v8_0_enable_doorbell(struct amdgpu_device *adev, bool enable)
 	adev->doorbell_enabled = true;
 }
 
-static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd)
+int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd)
 {
 	uint32_t tmp;
 
@@ -4867,6 +4887,28 @@ static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd)
 
 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
 	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
+	WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
+	WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
+
+	/* set the HQD priority */
+	WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority);
+	WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority);
+	WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum);
+
+	/* set cwsr save area */
+	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, mqd->cp_hqd_ctx_save_base_addr_lo);
+	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, mqd->cp_hqd_ctx_save_base_addr_hi);
+	WREG32(mmCP_HQD_CTX_SAVE_CONTROL, mqd->cp_hqd_ctx_save_control);
+	WREG32(mmCP_HQD_CNTL_STACK_OFFSET, mqd->cp_hqd_cntl_stack_offset);
+	WREG32(mmCP_HQD_CNTL_STACK_SIZE, mqd->cp_hqd_cntl_stack_size);
+	WREG32(mmCP_HQD_WG_STATE_OFFSET, mqd->cp_hqd_wg_state_offset);
+	WREG32(mmCP_HQD_CTX_SAVE_SIZE, mqd->cp_hqd_ctx_save_size);
+
+	WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control);
+	WREG32(mmCP_HQD_EOP_EVENTS, mqd->cp_hqd_eop_done_events);
+	WREG32(mmCP_HQD_ERROR, mqd->cp_hqd_error);
+	WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
+	WREG32(mmCP_HQD_EOP_DONES, mqd->cp_hqd_eop_dones);
 
 	/* set the vmid for the queue */
 	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
index 788cc3a..ec3f11f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
@@ -27,4 +27,9 @@
 extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block;
 extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block;
 
+struct amdgpu_device;
+struct vi_mqd;
+
+int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd);
+
 #endif
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 06/13] drm/amdgpu: rename rdev to adev
       [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (4 preceding siblings ...)
  2017-02-04  4:51   ` [PATCH 05/13] drm/amdgpu: unify MQD programming sequence for kfd and amdgpu Andres Rodriguez
@ 2017-02-04  4:51   ` Andres Rodriguez
  2017-02-04  4:51   ` [PATCH 07/13] drm/amdgpu: take ownership of per-pipe configuration Andres Rodriguez
                     ` (7 subsequent siblings)
  13 siblings, 0 replies; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-04  4:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, Andres Rodriguez,
	John.Bridgman-5C7GfCeVMHo

Rename straggler instances of r(adeon)dev to a(mdgpu)dev

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 70 +++++++++++++++---------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 14 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c      |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      |  2 +-
 4 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index dba8a5b..3200ff9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -60,9 +60,9 @@ int amdgpu_amdkfd_init(void)
 	return ret;
 }
 
-bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
+bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev)
 {
-	switch (rdev->asic_type) {
+	switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	case CHIP_KAVERI:
 		kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
@@ -86,16 +86,16 @@ void amdgpu_amdkfd_fini(void)
 	}
 }
 
-void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 {
 	if (kgd2kfd)
-		rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
-					rdev->pdev, kfd2kgd);
+		adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
+					adev->pdev, kfd2kgd);
 }
 
-void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
-	if (rdev->kfd) {
+	if (adev->kfd) {
 		struct kgd2kfd_shared_resources gpu_resources = {
 			.compute_vmid_bitmap = 0xFF00,
 
@@ -103,42 +103,42 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev)
 			.compute_pipe_count = 4 - 1,
 		};
 
-		amdgpu_doorbell_get_kfd_info(rdev,
+		amdgpu_doorbell_get_kfd_info(adev,
 				&gpu_resources.doorbell_physical_address,
 				&gpu_resources.doorbell_aperture_size,
 				&gpu_resources.doorbell_start_offset);
 
-		kgd2kfd->device_init(rdev->kfd, &gpu_resources);
+		kgd2kfd->device_init(adev->kfd, &gpu_resources);
 	}
 }
 
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
 {
-	if (rdev->kfd) {
-		kgd2kfd->device_exit(rdev->kfd);
-		rdev->kfd = NULL;
+	if (adev->kfd) {
+		kgd2kfd->device_exit(adev->kfd);
+		adev->kfd = NULL;
 	}
 }
 
-void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 		const void *ih_ring_entry)
 {
-	if (rdev->kfd)
-		kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
+	if (adev->kfd)
+		kgd2kfd->interrupt(adev->kfd, ih_ring_entry);
 }
 
-void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
 {
-	if (rdev->kfd)
-		kgd2kfd->suspend(rdev->kfd);
+	if (adev->kfd)
+		kgd2kfd->suspend(adev->kfd);
 }
 
-int amdgpu_amdkfd_resume(struct amdgpu_device *rdev)
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
 {
 	int r = 0;
 
-	if (rdev->kfd)
-		r = kgd2kfd->resume(rdev->kfd);
+	if (adev->kfd)
+		r = kgd2kfd->resume(adev->kfd);
 
 	return r;
 }
@@ -147,7 +147,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 			void **mem_obj, uint64_t *gpu_addr,
 			void **cpu_ptr)
 {
-	struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 	struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
 	int r;
 
@@ -159,10 +159,10 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 	if ((*mem) == NULL)
 		return -ENOMEM;
 
-	r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
+	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
 			     AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
 	if (r) {
-		dev_err(rdev->dev,
+		dev_err(adev->dev,
 			"failed to allocate BO for amdkfd (%d)\n", r);
 		return r;
 	}
@@ -170,21 +170,21 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 	/* map the buffer */
 	r = amdgpu_bo_reserve((*mem)->bo, true);
 	if (r) {
-		dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
+		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
 		goto allocate_mem_reserve_bo_failed;
 	}
 
 	r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
 				&(*mem)->gpu_addr);
 	if (r) {
-		dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
+		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
 		goto allocate_mem_pin_bo_failed;
 	}
 	*gpu_addr = (*mem)->gpu_addr;
 
 	r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
 	if (r) {
-		dev_err(rdev->dev,
+		dev_err(adev->dev,
 			"(%d) failed to map bo to kernel for amdkfd\n", r);
 		goto allocate_mem_kmap_bo_failed;
 	}
@@ -220,27 +220,27 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 
 uint64_t get_vmem_size(struct kgd_dev *kgd)
 {
-	struct amdgpu_device *rdev =
+	struct amdgpu_device *adev =
 		(struct amdgpu_device *)kgd;
 
 	BUG_ON(kgd == NULL);
 
-	return rdev->mc.real_vram_size;
+	return adev->mc.real_vram_size;
 }
 
 uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
 {
-	struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
-	if (rdev->gfx.funcs->get_gpu_clock_counter)
-		return rdev->gfx.funcs->get_gpu_clock_counter(rdev);
+	if (adev->gfx.funcs->get_gpu_clock_counter)
+		return adev->gfx.funcs->get_gpu_clock_counter(adev);
 	return 0;
 }
 
 uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
 {
-	struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
 	/* The sclk is in quantas of 10kHz */
-	return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
+	return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index de530f68d..73f83a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -39,15 +39,15 @@ struct kgd_mem {
 int amdgpu_amdkfd_init(void);
 void amdgpu_amdkfd_fini(void);
 
-bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev);
+bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev);
 
-void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev);
-int amdgpu_amdkfd_resume(struct amdgpu_device *rdev);
-void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev);
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev);
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 			const void *ih_ring_entry);
-void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev);
-void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev);
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev);
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
+void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
+void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index d226804..65653b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1842,7 +1842,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
 /**
  * gmc_v7_0_init_compute_vmid - gart enable
  *
- * @rdev: amdgpu_device pointer
+ * @adev: amdgpu_device pointer
  *
  * Initialize compute vmid sh_mem registers
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 7755d58..4d40f57 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -3799,7 +3799,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
 /**
  * gfx_v8_0_init_compute_vmid - gart enable
  *
- * @rdev: amdgpu_device pointer
+ * @adev: amdgpu_device pointer
  *
  * Initialize compute vmid sh_mem registers
  *
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 07/13] drm/amdgpu: take ownership of per-pipe configuration
       [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (5 preceding siblings ...)
  2017-02-04  4:51   ` [PATCH 06/13] drm/amdgpu: rename rdev to adev Andres Rodriguez
@ 2017-02-04  4:51   ` Andres Rodriguez
  2017-02-04  4:51   ` [PATCH 08/13] drm/radeon: take ownership of pipe initialization Andres Rodriguez
                     ` (6 subsequent siblings)
  13 siblings, 0 replies; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-04  4:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, Andres Rodriguez,
	John.Bridgman-5C7GfCeVMHo

Make amdgpu the owner of all per-pipe state of the HQDs.

This change will allow us to split the queues between kfd and amdgpu
with a queue granularity instead of pipe granularity.

This patch fixes kfd allocating an HDP_EOP region for its 3 pipes which
goes unused.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h                |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c  | 13 +------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c  |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c              | 28 ++++++++++----
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c              | 33 +++++++++++-----
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 45 ----------------------
 6 files changed, 49 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9740800..eeffd00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -776,9 +776,9 @@ struct amdgpu_rlc {
 struct amdgpu_mec {
 	struct amdgpu_bo	*hpd_eop_obj;
 	u64			hpd_eop_gpu_addr;
-	u32 num_pipe;
 	u32 num_mec;
-	u32 num_queue;
+	u32 num_pipe_per_mec;
+	u32 num_queue_per_pipe;
 };
 
 struct amdgpu_kiq {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 038b7ea..910f9d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -244,18 +244,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
-	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
-	uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
-	uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
-
-	lock_srbm(kgd, mec, pipe, 0, 0);
-	WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8));
-	WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8));
-	WREG32(mmCP_HPD_EOP_VMID, 0);
-	WREG32(mmCP_HPD_EOP_CONTROL, hpd_size);
-	unlock_srbm(kgd);
-
+	/* amdgpu owns the per-pipe state */
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 2ecef3d..5843368 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -206,6 +206,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
+	/* amdgpu owns the per-pipe state */
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 65653b3..cc98d71 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2798,6 +2798,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
+	size_t mec_hpd_size;
 
 	/*
 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
@@ -2805,13 +2806,26 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 	 * Nonetheless, we assign only 1 pipe because all other pipes will
 	 * be handled by KFD
 	 */
-	adev->gfx.mec.num_mec = 1;
-	adev->gfx.mec.num_pipe = 1;
-	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
+	switch (adev->asic_type) {
+	case CHIP_KAVERI:
+		adev->gfx.mec.num_mec = 2;
+		break;
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+	default:
+		adev->gfx.mec.num_mec = 1;
+		break;
+	}
+	adev->gfx.mec.num_pipe_per_mec = 4;
+	adev->gfx.mec.num_queue_per_pipe = 8;
 
+	mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
+		* GFX7_MEC_HPD_SIZE * 2;
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
-				     adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2,
+				     mec_hpd_size,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
@@ -2841,7 +2855,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 	}
 
 	/* clear memory.  Not sure if this is required or not */
-	memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2);
+	memset(hpd, 0, mec_hpd_size);
 
 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
@@ -3179,9 +3193,9 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
 	tmp |= (1 << 23);
 	WREG32(mmCP_CPF_DEBUG, tmp);
 
-	/* init the pipes */
+	/* init all pipes (even the ones we don't own) */
 	for (i = 0; i < adev->gfx.mec.num_mec; i++)
-		for (j = 0; j < adev->gfx.mec.num_pipe; j++)
+		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++)
 			gfx_v7_0_compute_pipe_init(adev, i, j);
 
 	/* init the queues */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 4d40f57..6588054 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1424,18 +1424,33 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
+	size_t mec_hpd_size;
 
-	/*
-	 * we assign only 1 pipe because all other pipes will
-	 * be handled by KFD
-	 */
-	adev->gfx.mec.num_mec = 1;
-	adev->gfx.mec.num_pipe = 1;
-	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
+	switch (adev->asic_type) {
+	case CHIP_FIJI:
+	case CHIP_TONGA:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_POLARIS10:
+	case CHIP_CARRIZO:
+		adev->gfx.mec.num_mec = 2;
+		break;
+	case CHIP_TOPAZ:
+	case CHIP_STONEY:
+	default:
+		adev->gfx.mec.num_mec = 1;
+		break;
+	}
+
+	adev->gfx.mec.num_pipe_per_mec = 4;
+	adev->gfx.mec.num_queue_per_pipe = 8;
+
+	/* only 1 pipe of the first MEC is owned by amdgpu */
+	mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * GFX8_MEC_HPD_SIZE;
 
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
-				     adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE,
+				     mec_hpd_size,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
@@ -1464,7 +1479,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 		return r;
 	}
 
-	memset(hpd, 0, adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE);
+	memset(hpd, 0, mec_hpd_size);
 
 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f49c551..c064dea 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -472,55 +472,10 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
 int init_pipelines(struct device_queue_manager *dqm,
 			unsigned int pipes_num, unsigned int first_pipe)
 {
-	void *hpdptr;
-	struct mqd_manager *mqd;
-	unsigned int i, err, inx;
-	uint64_t pipe_hpd_addr;
-
 	BUG_ON(!dqm || !dqm->dev);
 
 	pr_debug("kfd: In func %s\n", __func__);
 
-	/*
-	 * Allocate memory for the HPDs. This is hardware-owned per-pipe data.
-	 * The driver never accesses this memory after zeroing it.
-	 * It doesn't even have to be saved/restored on suspend/resume
-	 * because it contains no data when there are no active queues.
-	 */
-
-	err = kfd_gtt_sa_allocate(dqm->dev, CIK_HPD_EOP_BYTES * pipes_num,
-					&dqm->pipeline_mem);
-
-	if (err) {
-		pr_err("kfd: error allocate vidmem num pipes: %d\n",
-			pipes_num);
-		return -ENOMEM;
-	}
-
-	hpdptr = dqm->pipeline_mem->cpu_ptr;
-	dqm->pipelines_addr = dqm->pipeline_mem->gpu_addr;
-
-	memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num);
-
-	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
-	if (mqd == NULL) {
-		kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < pipes_num; i++) {
-		inx = i + first_pipe;
-		/*
-		 * HPD buffer on GTT is allocated by amdkfd, no need to waste
-		 * space in GTT for pipelines we don't initialize
-		 */
-		pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
-		pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
-		/* = log2(bytes/4)-1 */
-		dqm->dev->kfd2kgd->init_pipeline(dqm->dev->kgd, inx,
-				CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
-	}
-
 	return 0;
 }
 
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 08/13] drm/radeon: take ownership of pipe initialization
       [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (6 preceding siblings ...)
  2017-02-04  4:51   ` [PATCH 07/13] drm/amdgpu: take ownership of per-pipe configuration Andres Rodriguez
@ 2017-02-04  4:51   ` Andres Rodriguez
  2017-02-04  4:51   ` [PATCH 09/13] drm/amdgpu: allow split of queues with kfd at queue granularity Andres Rodriguez
                     ` (5 subsequent siblings)
  13 siblings, 0 replies; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-04  4:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, Andres Rodriguez,
	John.Bridgman-5C7GfCeVMHo

Take ownership of pipe initialization away from KFD.

Note that hpd_eop_gpu_addr was already large enough to accomodate all
pipes.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/radeon/cik.c        | 27 ++++++++++++++-------------
 drivers/gpu/drm/radeon/radeon_kfd.c | 13 +------------
 2 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index f6ff41a..82b57ef 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4595,23 +4595,24 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
 	/* init the pipes */
 	mutex_lock(&rdev->srbm_mutex);
 
-	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
+	for (i = 0; i < rdev->mec.num_pipe; ++i) {
+		cik_srbm_select(rdev, 0, i, 0, 0);
 
-	cik_srbm_select(rdev, 0, 0, 0, 0);
-
-	/* write the EOP addr */
-	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
-	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
+		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
+		/* write the EOP addr */
+		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
+		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
 
-	/* set the VMID assigned */
-	WREG32(CP_HPD_EOP_VMID, 0);
+		/* set the VMID assigned */
+		WREG32(CP_HPD_EOP_VMID, 0);
 
-	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-	tmp = RREG32(CP_HPD_EOP_CONTROL);
-	tmp &= ~EOP_SIZE_MASK;
-	tmp |= order_base_2(MEC_HPD_SIZE / 8);
-	WREG32(CP_HPD_EOP_CONTROL, tmp);
+		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+		tmp = RREG32(CP_HPD_EOP_CONTROL);
+		tmp &= ~EOP_SIZE_MASK;
+		tmp |= order_base_2(MEC_HPD_SIZE / 8);
+		WREG32(CP_HPD_EOP_CONTROL, tmp);
 
+	}
 	mutex_unlock(&rdev->srbm_mutex);
 
 	/* init the queues.  Just two for now. */
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index 87a9ebb..a06e3b1 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -423,18 +423,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
-	uint32_t mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
-	uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
-
-	lock_srbm(kgd, mec, pipe, 0, 0);
-	write_register(kgd, CP_HPD_EOP_BASE_ADDR,
-			lower_32_bits(hpd_gpu_addr >> 8));
-	write_register(kgd, CP_HPD_EOP_BASE_ADDR_HI,
-			upper_32_bits(hpd_gpu_addr >> 8));
-	write_register(kgd, CP_HPD_EOP_VMID, 0);
-	write_register(kgd, CP_HPD_EOP_CONTROL, hpd_size);
-	unlock_srbm(kgd);
-
+	/* nothing to do here */
 	return 0;
 }
 
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 09/13] drm/amdgpu: allow split of queues with kfd at queue granularity
       [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (7 preceding siblings ...)
  2017-02-04  4:51   ` [PATCH 08/13] drm/radeon: take ownership of pipe initialization Andres Rodriguez
@ 2017-02-04  4:51   ` Andres Rodriguez
  2017-02-04  4:51   ` [PATCH 10/13] drm/amdkfd: allow split HQD split on per-queue granularity Andres Rodriguez
                     ` (4 subsequent siblings)
  13 siblings, 0 replies; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-04  4:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, Andres Rodriguez,
	John.Bridgman-5C7GfCeVMHo

Previously the queue/pipe split with kfd operated with pipe
granularity. This patch allows amdgpu to take ownership of an arbitrary
set of queues.

It also consolidates the last few magic numbers in the compute
initialization process into mec_init.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h             |  7 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c           | 83 ++++++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c           | 79 ++++++++++++++++++-----
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h |  1 +
 4 files changed, 133 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index eeffd00..b2fafd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -46,6 +46,8 @@
 #include <drm/drm_gem.h>
 #include <drm/amdgpu_drm.h>
 
+#include <kgd_kfd_interface.h>
+
 #include "amd_shared.h"
 #include "amdgpu_mode.h"
 #include "amdgpu_ih.h"
@@ -773,12 +775,17 @@ struct amdgpu_rlc {
 	u32 *register_restore;
 };
 
+#define AMDGPU_MAX_QUEUES KGD_MAX_QUEUES
+
 struct amdgpu_mec {
 	struct amdgpu_bo	*hpd_eop_obj;
 	u64			hpd_eop_gpu_addr;
 	u32 num_mec;
 	u32 num_pipe_per_mec;
 	u32 num_queue_per_pipe;
+
+	/* These are the resources for which amdgpu takes ownership */
+	DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_QUEUES);
 };
 
 struct amdgpu_kiq {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index cc98d71..ff5d25c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -49,7 +49,6 @@
 #include "oss/oss_2_0_sh_mask.h"
 
 #define GFX7_NUM_GFX_RINGS     1
-#define GFX7_NUM_COMPUTE_RINGS 8
 #define GFX7_MEC_HPD_SIZE      2048
 
 
@@ -2794,18 +2793,46 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
 	}
 }
 
+static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev)
+{
+	int i, queue, pipe, mec;
+
+	/* policy for amdgpu compute queue ownership */
+	for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) {
+		queue = i % adev->gfx.mec.num_queue_per_pipe;
+		pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+			% adev->gfx.mec.num_pipe_per_mec;
+		mec = (i / adev->gfx.mec.num_queue_per_pipe)
+			/ adev->gfx.mec.num_pipe_per_mec;
+
+		/* we've run out of HW */
+		if (mec > adev->gfx.mec.num_mec)
+			break;
+
+		/* policy: amdgpu owns all queues in the first pipe */
+		if (mec == 0 && pipe == 0)
+			set_bit(i, adev->gfx.mec.queue_bitmap);
+	}
+
+	/* update the number of active compute rings */
+	adev->gfx.num_compute_rings =
+		bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES);
+
+	/* If you hit this case and edited the policy, you probably just
+	 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
+	WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS);
+	if (adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)
+		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+}
+
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
 	size_t mec_hpd_size;
 
-	/*
-	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
-	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
-	 * Nonetheless, we assign only 1 pipe because all other pipes will
-	 * be handled by KFD
-	 */
+	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES);
+
 	switch (adev->asic_type) {
 	case CHIP_KAVERI:
 		adev->gfx.mec.num_mec = 2;
@@ -2821,6 +2848,10 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 	adev->gfx.mec.num_pipe_per_mec = 4;
 	adev->gfx.mec.num_queue_per_pipe = 8;
 
+	/* take ownership of the relevant compute queues */
+	gfx_v7_0_compute_queue_acquire(adev);
+
+	/* allocate space for ALL pipes (even the ones we don't own) */
 	mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
 		* GFX7_MEC_HPD_SIZE * 2;
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
@@ -4496,7 +4527,7 @@ static int gfx_v7_0_early_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
-	adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS;
+	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
 	adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
 	adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
 	gfx_v7_0_set_ring_funcs(adev);
@@ -4692,7 +4723,7 @@ static int gfx_v7_0_sw_init(void *handle)
 {
 	struct amdgpu_ring *ring;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	int i, r;
+	int i, r, ring_id;
 
 	/* EOP Event */
 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
@@ -4741,28 +4772,38 @@ static int gfx_v7_0_sw_init(void *handle)
 	}
 
 	/* set up the compute queues */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+	for (i = 0, ring_id = 0; i < AMDGPU_MAX_QUEUES; i++) {
 		unsigned irq_type;
 
-		/* max 32 queues per MEC */
-		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
-			DRM_ERROR("Too many (%d) compute rings!\n", i);
-			break;
-		}
-		ring = &adev->gfx.compute_ring[i];
+		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+			continue;
+
+		ring = &adev->gfx.compute_ring[ring_id];
+
+		/* mec0 is me1 */
+		ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
+				/ adev->gfx.mec.num_pipe_per_mec)
+				+ 1;
+		ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+				% adev->gfx.mec.num_pipe_per_mec;
+		ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
+
 		ring->ring_obj = NULL;
 		ring->use_doorbell = true;
-		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
-		ring->me = 1; /* first MEC */
-		ring->pipe = i / 8;
-		ring->queue = i % 8;
+		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
-		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
+
+		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+			+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+			+ ring->pipe;
+
 		/* type-2 packets are deprecated on MEC, use type-3 instead */
 		r = amdgpu_ring_init(adev, ring, 1024,
 				     &adev->gfx.eop_irq, irq_type);
 		if (r)
 			return r;
+
+		ring_id++;
 	}
 
 	/* reserve GDS, GWS and OA resource for gfx */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 6588054..d18bdd4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -52,7 +52,6 @@
 #include "smu/smu_7_1_3_d.h"
 
 #define GFX8_NUM_GFX_RINGS     1
-#define GFX8_NUM_COMPUTE_RINGS 8
 #define GFX8_MEC_HPD_SIZE 2048
 
 
@@ -1420,12 +1419,45 @@ static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
 
 #define GFX8_MEC_HPD_SIZE 2048
 
+static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev)
+{
+	int i, queue, pipe, mec;
+
+	/* policy for amdgpu compute queue ownership */
+	for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) {
+		queue = i % adev->gfx.mec.num_queue_per_pipe;
+		pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+			% adev->gfx.mec.num_pipe_per_mec;
+		mec = (i / adev->gfx.mec.num_queue_per_pipe)
+			/ adev->gfx.mec.num_pipe_per_mec;
+
+		/* we've run out of HW */
+		if (mec > adev->gfx.mec.num_mec)
+			break;
+
+		/* policy: amdgpu owns all queues in the first pipe */
+		if (mec == 0 && pipe == 0)
+			set_bit(i, adev->gfx.mec.queue_bitmap);
+	}
+
+	/* update the number of active compute rings */
+	adev->gfx.num_compute_rings =
+		bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES);
+
+	/* If you hit this case and edited the policy, you probably just
+	 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
+	if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
+		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+}
+
 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
 	size_t mec_hpd_size;
 
+	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_QUEUES);
+
 	switch (adev->asic_type) {
 	case CHIP_FIJI:
 	case CHIP_TONGA:
@@ -1445,8 +1477,10 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 	adev->gfx.mec.num_pipe_per_mec = 4;
 	adev->gfx.mec.num_queue_per_pipe = 8;
 
-	/* only 1 pipe of the first MEC is owned by amdgpu */
-	mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * GFX8_MEC_HPD_SIZE;
+	/* take ownership of the relevant compute queues */
+	gfx_v8_0_compute_queue_acquire(adev);
+
+	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
 
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
@@ -2090,7 +2124,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 
 static int gfx_v8_0_sw_init(void *handle)
 {
-	int i, r;
+	int i, r, ring_id;
 	struct amdgpu_ring *ring;
 	struct amdgpu_kiq *kiq;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2166,28 +2200,41 @@ static int gfx_v8_0_sw_init(void *handle)
 	}
 
 	/* set up the compute queues */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+	for (i = 0, ring_id = 0; i < AMDGPU_MAX_QUEUES; i++) {
 		unsigned irq_type;
 
-		/* max 32 queues per MEC */
-		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
-			DRM_ERROR("Too many (%d) compute rings!\n", i);
+		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+			continue;
+
+		if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
 			break;
-		}
-		ring = &adev->gfx.compute_ring[i];
+
+		ring = &adev->gfx.compute_ring[ring_id];
+
+		/* mec0 is me1 */
+		ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
+				/ adev->gfx.mec.num_pipe_per_mec)
+				+ 1;
+		ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+				% adev->gfx.mec.num_pipe_per_mec;
+		ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
+
 		ring->ring_obj = NULL;
 		ring->use_doorbell = true;
-		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
-		ring->me = 1; /* first MEC */
-		ring->pipe = i / 8;
-		ring->queue = i % 8;
+		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
-		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
+
+		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+			+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+			+ ring->pipe;
+
 		/* type-2 packets are deprecated on MEC, use type-3 instead */
 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
 				     irq_type);
 		if (r)
 			return r;
+
+		ring_id++;
 	}
 
 	/* reserve GDS, GWS and OA resource for gfx */
@@ -5692,7 +5739,7 @@ static int gfx_v8_0_early_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
-	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
+	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
 	gfx_v8_0_set_ring_funcs(adev);
 	gfx_v8_0_set_irq_funcs(adev);
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index a09d9f3..67f6d19 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -33,6 +33,7 @@
 struct pci_dev;
 
 #define KFD_INTERFACE_VERSION 1
+#define KGD_MAX_QUEUES 128
 
 struct kfd_dev;
 struct kgd_dev;
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 10/13] drm/amdkfd: allow split HQD split on per-queue granularity
       [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (8 preceding siblings ...)
  2017-02-04  4:51   ` [PATCH 09/13] drm/amdgpu: allow split of queues with kfd at queue granularity Andres Rodriguez
@ 2017-02-04  4:51   ` Andres Rodriguez
  2017-02-04  4:51   ` [PATCH 11/13] drm/amdgpu: remove duplicate magic constants from amdgpu_amdkfd_gfx*.c Andres Rodriguez
                     ` (3 subsequent siblings)
  13 siblings, 0 replies; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-04  4:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, Andres Rodriguez,
	John.Bridgman-5C7GfCeVMHo

Update the KGD to KFD interface to allow sharing pipes with queue
granularity instead of pipe granularity.

This allows for more interesting pipe/queue splits.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c         |  22 ++++-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c            |   4 +
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 100 ++++++++++++++-------
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  10 +--
 .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c  |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c    |   3 +-
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |   2 +-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h    |  17 ++--
 drivers/gpu/drm/radeon/radeon_kfd.c                |  21 ++++-
 9 files changed, 126 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 3200ff9..8fc5aa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -95,14 +95,30 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
+	int i;
+	int last_valid_bit;
 	if (adev->kfd) {
 		struct kgd2kfd_shared_resources gpu_resources = {
 			.compute_vmid_bitmap = 0xFF00,
-
-			.first_compute_pipe = 1,
-			.compute_pipe_count = 4 - 1,
+			.num_mec = adev->gfx.mec.num_mec,
+			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
+			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
 		};
 
+		/* this is going to have a few of the MSBs set that we need to
+		 * clear */
+		bitmap_complement(gpu_resources.queue_bitmap,
+				  adev->gfx.mec.queue_bitmap,
+				  KGD_MAX_QUEUES);
+
+		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
+		 * nbits is not compile time constant */
+		last_valid_bit = adev->gfx.mec.num_mec
+				* adev->gfx.mec.num_pipe_per_mec
+				* adev->gfx.mec.num_queue_per_pipe;
+		for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
+			clear_bit(i, gpu_resources.queue_bitmap);
+
 		amdgpu_doorbell_get_kfd_info(adev,
 				&gpu_resources.doorbell_physical_address,
 				&gpu_resources.doorbell_aperture_size,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3f95f7c..88187bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -226,6 +226,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 
 	kfd->shared_resources = *gpu_resources;
 
+	/* We only use the first MEC */
+	if (kfd->shared_resources.num_mec > 1)
+		kfd->shared_resources.num_mec = 1;
+
 	/* calculate max size of mqds needed for queues */
 	size = max_num_of_queues_per_device *
 			kfd->device_info->mqd_size_aligned;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c064dea..5321d18 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -63,21 +63,44 @@ enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
 	return KFD_MQD_TYPE_CP;
 }
 
-unsigned int get_first_pipe(struct device_queue_manager *dqm)
+static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
+{
+	int i;
+	int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
+		+ pipe * dqm->dev->shared_resources.num_queue_per_pipe;
+
+	/* queue is available for KFD usage if bit is 0 */
+	for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
+		if (test_bit(pipe_offset + i,
+			      dqm->dev->shared_resources.queue_bitmap))
+			return true;
+	return false;
+}
+
+unsigned int get_mec_num(struct device_queue_manager *dqm)
+{
+	BUG_ON(!dqm || !dqm->dev);
+
+	return dqm->dev->shared_resources.num_mec;
+}
+
+unsigned int get_queues_num(struct device_queue_manager *dqm)
 {
 	BUG_ON(!dqm || !dqm->dev);
-	return dqm->dev->shared_resources.first_compute_pipe;
+	return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
+				KGD_MAX_QUEUES);
 }
 
-unsigned int get_pipes_num(struct device_queue_manager *dqm)
+unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
 {
 	BUG_ON(!dqm || !dqm->dev);
-	return dqm->dev->shared_resources.compute_pipe_count;
+	return dqm->dev->shared_resources.num_queue_per_pipe;
 }
 
-static inline unsigned int get_pipes_num_cpsch(void)
+unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
 {
-	return PIPE_PER_ME_CP_SCHEDULING;
+	BUG_ON(!dqm || !dqm->dev);
+	return dqm->dev->shared_resources.num_pipe_per_mec;
 }
 
 void program_sh_mem_settings(struct device_queue_manager *dqm,
@@ -200,12 +223,16 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
 
 	set = false;
 
-	for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_num(dqm);
-			pipe = ((pipe + 1) % get_pipes_num(dqm)), ++i) {
+	for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_per_mec(dqm);
+			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
+
+		if (!is_pipe_enabled(dqm, 0, pipe))
+			continue;
+
 		if (dqm->allocated_queues[pipe] != 0) {
 			bit = find_first_bit(
 				(unsigned long *)&dqm->allocated_queues[pipe],
-				QUEUES_PER_PIPE);
+				get_queues_per_pipe(dqm));
 
 			clear_bit(bit,
 				(unsigned long *)&dqm->allocated_queues[pipe]);
@@ -222,7 +249,7 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
 	pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
 				__func__, q->pipe, q->queue);
 	/* horizontal hqd allocation */
-	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm);
+	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
 
 	return 0;
 }
@@ -469,36 +496,25 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
 						vmid);
 }
 
-int init_pipelines(struct device_queue_manager *dqm,
-			unsigned int pipes_num, unsigned int first_pipe)
-{
-	BUG_ON(!dqm || !dqm->dev);
-
-	pr_debug("kfd: In func %s\n", __func__);
-
-	return 0;
-}
-
 static void init_interrupts(struct device_queue_manager *dqm)
 {
 	unsigned int i;
 
 	BUG_ON(dqm == NULL);
 
-	for (i = 0 ; i < get_pipes_num(dqm) ; i++)
-		dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd,
-				i + get_first_pipe(dqm));
+	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
+		if (is_pipe_enabled(dqm, 0, i))
+			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
 }
 
 static int init_scheduler(struct device_queue_manager *dqm)
 {
-	int retval;
+	int retval = 0;
 
 	BUG_ON(!dqm);
 
 	pr_debug("kfd: In %s\n", __func__);
 
-	retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
 	return retval;
 }
 
@@ -509,21 +525,21 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
 	BUG_ON(!dqm);
 
 	pr_debug("kfd: In func %s num of pipes: %d\n",
-			__func__, get_pipes_num(dqm));
+			__func__, get_pipes_per_mec(dqm));
 
 	mutex_init(&dqm->lock);
 	INIT_LIST_HEAD(&dqm->queues);
 	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
 	dqm->sdma_queue_count = 0;
-	dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
+	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
 					sizeof(unsigned int), GFP_KERNEL);
 	if (!dqm->allocated_queues) {
 		mutex_destroy(&dqm->lock);
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < get_pipes_num(dqm); i++)
-		dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
+	for (i = 0; i < get_pipes_per_mec(dqm); i++)
+		dqm->allocated_queues[i] = (1 << get_queues_per_pipe(dqm)) - 1;
 
 	dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
 	dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
@@ -630,18 +646,34 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
 
 static int set_sched_resources(struct device_queue_manager *dqm)
 {
+	int i;
 	struct scheduling_resources res;
-	unsigned int queue_num, queue_mask;
 
 	BUG_ON(!dqm);
 
 	pr_debug("kfd: In func %s\n", __func__);
 
-	queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE;
-	queue_mask = (1 << queue_num) - 1;
 	res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
 	res.vmid_mask <<= KFD_VMID_START_OFFSET;
-	res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE);
+
+	/* Avoid touching the internal representation queue_bitmap directly.
+	 * Even though doing a simple memcpy might sound tempting, it would
+	 * silently break if the implementation of bitmaps is changed */
+	res.queue_mask = 0;
+	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
+		if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
+			continue;
+
+		/* This situation may be hit in the future if a new HW
+		 * generation exposes more than 64 queues. If so, the
+		 * definition of res.queue_mask needs updating */
+		if (WARN_ON(i > sizeof(res.queue_mask))) {
+			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
+			break;
+		}
+
+		res.queue_mask |= 1 << i;
+	}
 	res.gws_mask = res.oac_mask = res.gds_heap_base =
 						res.gds_heap_size = 0;
 
@@ -660,7 +692,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
 	BUG_ON(!dqm);
 
 	pr_debug("kfd: In func %s num of pipes: %d\n",
-			__func__, get_pipes_num_cpsch());
+			__func__, get_pipes_per_mec(dqm));
 
 	mutex_init(&dqm->lock);
 	INIT_LIST_HEAD(&dqm->queues);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index a625b91..66b9615 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -30,8 +30,6 @@
 #include "kfd_mqd_manager.h"
 
 #define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS	(500)
-#define QUEUES_PER_PIPE				(8)
-#define PIPE_PER_ME_CP_SCHEDULING		(3)
 #define CIK_VMID_NUM				(8)
 #define KFD_VMID_START_OFFSET			(8)
 #define VMID_PER_DEVICE				CIK_VMID_NUM
@@ -182,10 +180,10 @@ void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops);
 void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops);
 void program_sh_mem_settings(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
-int init_pipelines(struct device_queue_manager *dqm,
-		unsigned int pipes_num, unsigned int first_pipe);
-unsigned int get_first_pipe(struct device_queue_manager *dqm);
-unsigned int get_pipes_num(struct device_queue_manager *dqm);
+unsigned int get_mec_num(struct device_queue_manager *dqm);
+unsigned int get_queues_num(struct device_queue_manager *dqm);
+unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
+unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
 
 static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index c6f435a..48dc056 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -151,5 +151,5 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
 
 static int initialize_cpsch_cik(struct device_queue_manager *dqm)
 {
-	return init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
+	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index ca8c093..7131998 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -65,8 +65,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
 
 	/* check if there is over subscription*/
 	*over_subscription = false;
-	if ((process_count > 1) ||
-		queue_count > PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE) {
+	if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) {
 		*over_subscription = true;
 		pr_debug("kfd: over subscribed runlist\n");
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index e1fb40b..32cdf2b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -209,7 +209,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 		/* check if there is over subscription */
 		if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
 		((dev->dqm->processes_count >= VMID_PER_DEVICE) ||
-		(dev->dqm->queue_count >= PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE))) {
+		(dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
 			pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
 			retval = -EPERM;
 			goto err_create_queue;
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 67f6d19..91ef148 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -29,10 +29,11 @@
 #define KGD_KFD_INTERFACE_H_INCLUDED
 
 #include <linux/types.h>
+#include <linux/bitmap.h>
 
 struct pci_dev;
 
-#define KFD_INTERFACE_VERSION 1
+#define KFD_INTERFACE_VERSION 2
 #define KGD_MAX_QUEUES 128
 
 struct kfd_dev;
@@ -62,11 +63,17 @@ struct kgd2kfd_shared_resources {
 	/* Bit n == 1 means VMID n is available for KFD. */
 	unsigned int compute_vmid_bitmap;
 
-	/* Compute pipes are counted starting from MEC0/pipe0 as 0. */
-	unsigned int first_compute_pipe;
+	/* number of mec available from the hardware */
+	uint32_t num_mec;
 
-	/* Number of MEC pipes available for KFD. */
-	unsigned int compute_pipe_count;
+	/* number of pipes per mec */
+	uint32_t num_pipe_per_mec;
+
+	/* number of queues per pipe */
+	uint32_t num_queue_per_pipe;
+
+	/* Bit n == 1 means Queue n is available for KFD */
+	DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
 
 	/* Base address of doorbell aperture. */
 	phys_addr_t doorbell_physical_address;
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index a06e3b1..699fe7f 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -179,14 +179,29 @@ void radeon_kfd_device_probe(struct radeon_device *rdev)
 
 void radeon_kfd_device_init(struct radeon_device *rdev)
 {
+	int i, queue, pipe, mec;
+
 	if (rdev->kfd) {
 		struct kgd2kfd_shared_resources gpu_resources = {
 			.compute_vmid_bitmap = 0xFF00,
-
-			.first_compute_pipe = 1,
-			.compute_pipe_count = 4 - 1,
+			.num_mec = 1,
+			.num_pipe_per_mec = 4,
+			.num_queue_per_pipe = 8
 		};
 
+		bitmap_zero(gpu_resources.queue_bitmap, KGD_MAX_QUEUES);
+
+		for (i = 0; i < KGD_MAX_QUEUES; ++i) {
+			queue = i % gpu_resources.num_queue_per_pipe;
+			pipe = (i / gpu_resources.num_queue_per_pipe)
+				% gpu_resources.num_pipe_per_mec;
+			mec = (i / gpu_resources.num_queue_per_pipe)
+				/ gpu_resources.num_pipe_per_mec;
+
+			if (mec == 0 && pipe > 0)
+				set_bit(i, gpu_resources.queue_bitmap);
+		}
+
 		radeon_doorbell_get_kfd_info(rdev,
 				&gpu_resources.doorbell_physical_address,
 				&gpu_resources.doorbell_aperture_size,
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 11/13] drm/amdgpu: remove duplicate magic constants from amdgpu_amdkfd_gfx*.c
       [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (9 preceding siblings ...)
  2017-02-04  4:51   ` [PATCH 10/13] drm/amdkfd: allow split HQD split on per-queue granularity Andres Rodriguez
@ 2017-02-04  4:51   ` Andres Rodriguez
  2017-02-04  4:51   ` [PATCH 12/13] drm/amdgpu: allocate queues horizontally across pipes Andres Rodriguez
                     ` (2 subsequent siblings)
  13 siblings, 0 replies; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-04  4:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, Andres Rodriguez,
	John.Bridgman-5C7GfCeVMHo

This information is already available in adev.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 12 ++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 910f9d3..5254562 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -39,8 +39,6 @@
 #include "gmc/gmc_7_1_sh_mask.h"
 #include "cik_structs.h"
 
-#define CIK_PIPE_PER_MEC	(4)
-
 enum {
 	MAX_TRAPID = 8,		/* 3 bits in the bitfield. */
 	MAX_WATCH_ADDRESSES = 4
@@ -186,8 +184,10 @@ static void unlock_srbm(struct kgd_dev *kgd)
 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t queue_id)
 {
-	uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
-	uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 
 	lock_srbm(kgd, mec, pipe, queue_id, 0);
 }
@@ -254,8 +254,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 	uint32_t mec;
 	uint32_t pipe;
 
-	mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
-	pipe = (pipe_id % CIK_PIPE_PER_MEC);
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 
 	lock_srbm(kgd, mec, pipe, 0, 0);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 5843368..db7410a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -39,8 +39,6 @@
 #include "vi_structs.h"
 #include "vid.h"
 
-#define VI_PIPE_PER_MEC	(4)
-
 struct cik_sdma_rlc_registers;
 
 /*
@@ -147,8 +145,10 @@ static void unlock_srbm(struct kgd_dev *kgd)
 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t queue_id)
 {
-	uint32_t mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
-	uint32_t pipe = (pipe_id % VI_PIPE_PER_MEC);
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 
 	lock_srbm(kgd, mec, pipe, queue_id, 0);
 }
@@ -216,8 +216,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 	uint32_t mec;
 	uint32_t pipe;
 
-	mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
-	pipe = (pipe_id % VI_PIPE_PER_MEC);
+	mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 
 	lock_srbm(kgd, mec, pipe, 0, 0);
 
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 12/13] drm/amdgpu: allocate queues horizontally across pipes
       [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (10 preceding siblings ...)
  2017-02-04  4:51   ` [PATCH 11/13] drm/amdgpu: remove duplicate magic constants from amdgpu_amdkfd_gfx*.c Andres Rodriguez
@ 2017-02-04  4:51   ` Andres Rodriguez
  2017-02-04  4:51   ` [PATCH 13/13] drm/amdgpu: new queue policy, take first 2 queues of each pipe Andres Rodriguez
  2017-02-06 20:20   ` Change queue/pipe split between amdkfd and amdgpu Felix Kuehling
  13 siblings, 0 replies; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-04  4:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, Andres Rodriguez,
	John.Bridgman-5C7GfCeVMHo

Pipes provide better concurrency than queues, therefore we want to make
sure that apps use queues from different pipes whenever possible.

Optimize for the trivial case where an app will consume rings in order,
therefore we don't want adjacent rings to belong to the same pipe.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 13 ++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 78 +++++++++++++++++++-------------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 83 +++++++++++++++++++++--------------
 3 files changed, 109 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b2fafd6..11112f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1644,6 +1644,19 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 		return NULL;
 }
 
+static inline bool amdgpu_is_mec_queue_enabled(struct amdgpu_device *adev,
+						int mec, int pipe, int queue)
+{
+	int bit = 0;
+
+	bit += mec * adev->gfx.mec.num_pipe_per_mec
+		* adev->gfx.mec.num_queue_per_pipe;
+	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
+	bit += queue;
+
+	return test_bit(bit, adev->gfx.mec.queue_bitmap);
+}
+
 /*
  * ASICs macro.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index ff5d25c..2218b65 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4719,11 +4719,42 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
 	adev->gfx.config.gb_addr_config = gb_addr_config;
 }
 
+static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+					int mec, int pipe, int queue)
+{
+	int r;
+	unsigned irq_type;
+	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+	/* mec0 is me1 */
+	ring->me = mec + 1;
+	ring->pipe = pipe;
+	ring->queue = queue;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+		+ ring->pipe;
+
+	/* type-2 packets are deprecated on MEC, use type-3 instead */
+	r = amdgpu_ring_init(adev, ring, 1024,
+			&adev->gfx.eop_irq, irq_type);
+	if (r)
+		return r;
+
+
+	return 0;
+}
+
 static int gfx_v7_0_sw_init(void *handle)
 {
 	struct amdgpu_ring *ring;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	int i, r, ring_id;
+	int i, j, k, r, ring_id;
 
 	/* EOP Event */
 	r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
@@ -4771,39 +4802,24 @@ static int gfx_v7_0_sw_init(void *handle)
 			return r;
 	}
 
-	/* set up the compute queues */
-	for (i = 0, ring_id = 0; i < AMDGPU_MAX_QUEUES; i++) {
-		unsigned irq_type;
-
-		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-			continue;
-
-		ring = &adev->gfx.compute_ring[ring_id];
-
-		/* mec0 is me1 */
-		ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
-				/ adev->gfx.mec.num_pipe_per_mec)
-				+ 1;
-		ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
-				% adev->gfx.mec.num_pipe_per_mec;
-		ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
-
-		ring->ring_obj = NULL;
-		ring->use_doorbell = true;
-		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
-		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+	/* set up the compute queues - allocate horizontally across pipes */
+	ring_id = 0;
+	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; ++i) {
+		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
 
-		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
-			+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
-			+ ring->pipe;
+				if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
+					continue;
 
-		/* type-2 packets are deprecated on MEC, use type-3 instead */
-		r = amdgpu_ring_init(adev, ring, 1024,
-				     &adev->gfx.eop_irq, irq_type);
-		if (r)
-			return r;
+				r = gfx_v7_0_compute_ring_init(adev,
+								ring_id,
+								i, k, j);
+				if (r)
+					return r;
 
-		ring_id++;
+				ring_id++;
+			}
+		}
 	}
 
 	/* reserve GDS, GWS and OA resource for gfx */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index d18bdd4..04b4448 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -2122,9 +2122,42 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 	return 0;
 }
 
+static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+					int mec, int pipe, int queue)
+{
+	int r;
+	unsigned irq_type;
+	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+	ring = &adev->gfx.compute_ring[ring_id];
+
+	/* mec0 is me1 */
+	ring->me = mec + 1;
+	ring->pipe = pipe;
+	ring->queue = queue;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+		+ ring->pipe;
+
+	/* type-2 packets are deprecated on MEC, use type-3 instead */
+	r = amdgpu_ring_init(adev, ring, 1024,
+			&adev->gfx.eop_irq, irq_type);
+	if (r)
+		return r;
+
+
+	return 0;
+}
+
 static int gfx_v8_0_sw_init(void *handle)
 {
-	int i, r, ring_id;
+	int i, j, k, r, ring_id;
 	struct amdgpu_ring *ring;
 	struct amdgpu_kiq *kiq;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -2199,42 +2232,24 @@ static int gfx_v8_0_sw_init(void *handle)
 			return r;
 	}
 
-	/* set up the compute queues */
-	for (i = 0, ring_id = 0; i < AMDGPU_MAX_QUEUES; i++) {
-		unsigned irq_type;
-
-		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-			continue;
-
-		if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
-			break;
-
-		ring = &adev->gfx.compute_ring[ring_id];
-
-		/* mec0 is me1 */
-		ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
-				/ adev->gfx.mec.num_pipe_per_mec)
-				+ 1;
-		ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
-				% adev->gfx.mec.num_pipe_per_mec;
-		ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
-
-		ring->ring_obj = NULL;
-		ring->use_doorbell = true;
-		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
-		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+	/* set up the compute queues - allocate horizontally across pipes */
+	ring_id = 0;
+	for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; ++i) {
+		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
 
-		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
-			+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
-			+ ring->pipe;
+				if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
+					continue;
 
-		/* type-2 packets are deprecated on MEC, use type-3 instead */
-		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
-				     irq_type);
-		if (r)
-			return r;
+				r = gfx_v8_0_compute_ring_init(adev,
+								ring_id,
+								i, k, j);
+				if (r)
+					return r;
 
-		ring_id++;
+				ring_id++;
+			}
+		}
 	}
 
 	/* reserve GDS, GWS and OA resource for gfx */
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 13/13] drm/amdgpu: new queue policy, take first 2 queues of each pipe
       [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (11 preceding siblings ...)
  2017-02-04  4:51   ` [PATCH 12/13] drm/amdgpu: allocate queues horizontally across pipes Andres Rodriguez
@ 2017-02-04  4:51   ` Andres Rodriguez
       [not found]     ` <20170204045142.5596-14-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-02-06 20:20   ` Change queue/pipe split between amdkfd and amdgpu Felix Kuehling
  13 siblings, 1 reply; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-04  4:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, Andres Rodriguez,
	John.Bridgman-5C7GfCeVMHo

Instead of taking the first pipe and givint the rest to kfd, take the
first 2 queues of each pipe.

Effectively, amdgpu and amdkfd own the same number of queues. But
because the queues are spread over multiple pipes the hardware will be
able to better handle concurrent compute workloads.

amdgpu goes from 1 pipe to 4 pipes, i.e. from 1 compute threads to 4
amdkfd goes from 3 pipe to 4 pipes, i.e. from 3 compute threads to 4

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 2218b65..da28174 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2810,7 +2810,7 @@ static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev)
 			break;
 
 		/* policy: amdgpu owns all queues in the first pipe */
-		if (mec == 0 && pipe == 0)
+		if (mec == 0 && queue < 2)
 			set_bit(i, adev->gfx.mec.queue_bitmap);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 04b4448..0a16cab 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1436,7 +1436,7 @@ static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev)
 			break;
 
 		/* policy: amdgpu owns all queues in the first pipe */
-		if (mec == 0 && pipe == 0)
+		if (mec == 0 && queue < 2)
 			set_bit(i, adev->gfx.mec.queue_bitmap);
 	}
 
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH 13/13] drm/amdgpu: new queue policy, take first 2 queues of each pipe
       [not found]     ` <20170204045142.5596-14-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-02-04 12:08       ` Edward O'Callaghan
       [not found]         ` <86138a88-e90f-3234-7109-67ca0c427071-dczkZgxz+BNUPWh3PAxdjQ@public.gmane.org>
  0 siblings, 1 reply; 31+ messages in thread
From: Edward O'Callaghan @ 2017-02-04 12:08 UTC (permalink / raw)
  To: Andres Rodriguez, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, John.Bridgman-5C7GfCeVMHo


[-- Attachment #1.1.1: Type: text/plain, Size: 1933 bytes --]

This series is,
Reviewed-by: Edward O'Callaghan <funfunctor-dczkZgxz+BNUPWh3PAxdjQ@public.gmane.org>

On 02/04/2017 03:51 PM, Andres Rodriguez wrote:
> Instead of taking the first pipe and givint the rest to kfd, take the

s/givint/giving/

> first 2 queues of each pipe.
> 
> Effectively, amdgpu and amdkfd own the same number of queues. But
> because the queues are spread over multiple pipes the hardware will be
> able to better handle concurrent compute workloads.
> 
> amdgpu goes from 1 pipe to 4 pipes, i.e. from 1 compute threads to 4
> amdkfd goes from 3 pipe to 4 pipes, i.e. from 3 compute threads to 4
> 
> Signed-off-by: Andres Rodriguez <andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 +-
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 2218b65..da28174 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -2810,7 +2810,7 @@ static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev)
>  			break;
>  
>  		/* policy: amdgpu owns all queues in the first pipe */
> -		if (mec == 0 && pipe == 0)
> +		if (mec == 0 && queue < 2)
>  			set_bit(i, adev->gfx.mec.queue_bitmap);
>  	}
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 04b4448..0a16cab 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -1436,7 +1436,7 @@ static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev)
>  			break;
>  
>  		/* policy: amdgpu owns all queues in the first pipe */
> -		if (mec == 0 && pipe == 0)
> +		if (mec == 0 && queue < 2)
>  			set_bit(i, adev->gfx.mec.queue_bitmap);
>  	}
>  
> 


[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 02/13] drm/amdgpu: doorbell registers need only be set once
       [not found]     ` <20170204045142.5596-3-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-02-06  8:31       ` Christian König
       [not found]         ` <7d85c562-5227-1fef-7b99-1f7543e6e69b-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 31+ messages in thread
From: Christian König @ 2017-02-06  8:31 UTC (permalink / raw)
  To: Andres Rodriguez, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, John.Bridgman-5C7GfCeVMHo

Am 04.02.2017 um 05:51 schrieb Andres Rodriguez:
> The CP_MEC_DOORBELL_RANGE_* and CP_PQ_STATUS.DOORBELL_ENABLE registers
> are not HQD specific.
>
> They only need to be set once if at least 1 pipe requested doorbell
> support.
>
> Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +++++-
>   2 files changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 147ce0e..9740800 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1427,6 +1427,7 @@ struct amdgpu_device {
>   	unsigned			num_rings;
>   	struct amdgpu_ring		*rings[AMDGPU_MAX_RINGS];
>   	bool				ib_pool_ready;
> +	bool				doorbell_enabled;

Better put that into amdgpu_gfx and not amdgpu_device, cause that is a 
gfx (CP) specific state.

Apart from that the patch looks good to me.

Christian.

>   	struct amdgpu_sa_manager	ring_tmp_bo;
>   
>   	/* interrupts */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index cf738e5..5d0e2c8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -4796,7 +4796,7 @@ static void gfx_v8_0_enable_doorbell(struct amdgpu_device *adev, bool enable)
>   {
>   	uint32_t tmp;
>   
> -	if (!enable)
> +	if (!enable || adev->doorbell_enabled)
>   		return;
>   
>   	if ((adev->asic_type == CHIP_CARRIZO) ||
> @@ -4811,6 +4811,8 @@ static void gfx_v8_0_enable_doorbell(struct amdgpu_device *adev, bool enable)
>   	tmp = RREG32(mmCP_PQ_STATUS);
>   	tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
>   	WREG32(mmCP_PQ_STATUS, tmp);
> +
> +	adev->doorbell_enabled = true;
>   }
>   
>   static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd)
> @@ -5108,6 +5110,8 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
>   {
>   	int r;
>   
> +	adev->doorbell_enabled = false;
> +
>   	if (!(adev->flags & AMD_IS_APU))
>   		gfx_v8_0_enable_gui_idle_interrupt(adev, false);
>   


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 13/13] drm/amdgpu: new queue policy, take first 2 queues of each pipe
       [not found]         ` <86138a88-e90f-3234-7109-67ca0c427071-dczkZgxz+BNUPWh3PAxdjQ@public.gmane.org>
@ 2017-02-06  8:35           ` Christian König
  0 siblings, 0 replies; 31+ messages in thread
From: Christian König @ 2017-02-06  8:35 UTC (permalink / raw)
  To: Edward O'Callaghan, Andres Rodriguez,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, John.Bridgman-5C7GfCeVMHo


[-- Attachment #1.1: Type: text/plain, Size: 2412 bytes --]

Indeed a very nice set, only briefly looked over it and only found a 
small issue on patch #2.

Apart from that the set is Acked-by: Christian König 
<christian.koenig-5C7GfCeVMHo@public.gmane.org>.

Regards,
Christian.

Am 04.02.2017 um 13:08 schrieb Edward O'Callaghan:
> This series is,
> Reviewed-by: Edward O'Callaghan <funfunctor-dczkZgxz+BNUPWh3PAxdjQ@public.gmane.org>
>
> On 02/04/2017 03:51 PM, Andres Rodriguez wrote:
>> Instead of taking the first pipe and givint the rest to kfd, take the
> s/givint/giving/
>
>> first 2 queues of each pipe.
>>
>> Effectively, amdgpu and amdkfd own the same number of queues. But
>> because the queues are spread over multiple pipes the hardware will be
>> able to better handle concurrent compute workloads.
>>
>> amdgpu goes from 1 pipe to 4 pipes, i.e. from 1 compute threads to 4
>> amdkfd goes from 3 pipe to 4 pipes, i.e. from 3 compute threads to 4
>>
>> Signed-off-by: Andres Rodriguez <andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 +-
>>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +-
>>   2 files changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>> index 2218b65..da28174 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
>> @@ -2810,7 +2810,7 @@ static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev)
>>   			break;
>>   
>>   		/* policy: amdgpu owns all queues in the first pipe */
>> -		if (mec == 0 && pipe == 0)
>> +		if (mec == 0 && queue < 2)
>>   			set_bit(i, adev->gfx.mec.queue_bitmap);
>>   	}
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> index 04b4448..0a16cab 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> @@ -1436,7 +1436,7 @@ static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev)
>>   			break;
>>   
>>   		/* policy: amdgpu owns all queues in the first pipe */
>> -		if (mec == 0 && pipe == 0)
>> +		if (mec == 0 && queue < 2)
>>   			set_bit(i, adev->gfx.mec.queue_bitmap);
>>   	}
>>   
>>
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx



[-- Attachment #1.2: Type: text/html, Size: 3618 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 05/13] drm/amdgpu: unify MQD programming sequence for kfd and amdgpu
       [not found]     ` <20170204045142.5596-6-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-02-06 20:16       ` Felix Kuehling
  0 siblings, 0 replies; 31+ messages in thread
From: Felix Kuehling @ 2017-02-06 20:16 UTC (permalink / raw)
  To: Andres Rodriguez, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	John.Bridgman-5C7GfCeVMHo


[-- Attachment #1.1: Type: text/plain, Size: 14311 bytes --]

In the current KFD branch, we changed our equivalent of the MQD commit
function to copy most of the MQD registers in a loop and just update a
few registers manually to get the right programming sequence and work
around some HW errata. We were told that this is what the HW scheduler
does as well. You can see this on github:

    https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/roc-1.4.x/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c#L364

Regards,
  Felix

On 17-02-03 11:51 PM, Andres Rodriguez wrote:
> Use the same gfx_*_mqd_commit function for kfd and amdgpu codepaths.
>
> This removes the last duplicates of this programming sequence.
>
> Signed-off-by: Andres Rodriguez <andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 51 ++---------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 49 ++--------------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c             | 38 ++++++++++++++++-
>  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h             |  5 +++
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c             | 44 ++++++++++++++++++-
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h             |  5 +++
>  6 files changed, 97 insertions(+), 95 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> index 1a0a5f7..038b7ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
> @@ -29,6 +29,7 @@
>  #include "cikd.h"
>  #include "cik_sdma.h"
>  #include "amdgpu_ucode.h"
> +#include "gfx_v7_0.h"
>  #include "gca/gfx_7_2_d.h"
>  #include "gca/gfx_7_2_enum.h"
>  #include "gca/gfx_7_2_sh_mask.h"
> @@ -309,55 +310,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
>  	m = get_mqd(mqd);
>  
>  	is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
> -
> -	acquire_queue(kgd, pipe_id, queue_id);
> -	WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
> -	WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
> -	WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
> -
> -	WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
> -	WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
> -	WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
> -
> -	WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
> -	WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
> -	WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
> -
> -	WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
> -
> -	WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
> -	WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
> -	WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
> -
> -	WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo);
> -	WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi);
> -	WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo);
> -	WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi);
> -
> -	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
> -	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
> -			m->cp_hqd_pq_rptr_report_addr_hi);
> -
> -	WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
> -
> -	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo);
> -	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi);
> -
> -	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
> -
> -	WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
> -
> -	WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
> -
> -	WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
> -	WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
> -
> -	WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
> -
>  	if (is_wptr_shadow_valid)
> -		WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow);
> +		m->cp_hqd_pq_wptr = wptr_shadow;
>  
> -	WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
> +	acquire_queue(kgd, pipe_id, queue_id);
> +	gfx_v7_0_mqd_commit(adev, m);
>  	release_queue(kgd);
>  
>  	return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> index 6697612..2ecef3d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
> @@ -28,6 +28,7 @@
>  #include "amdgpu.h"
>  #include "amdgpu_amdkfd.h"
>  #include "amdgpu_ucode.h"
> +#include "gfx_v8_0.h"
>  #include "gca/gfx_8_0_sh_mask.h"
>  #include "gca/gfx_8_0_d.h"
>  #include "gca/gfx_8_0_enum.h"
> @@ -251,53 +252,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
>  	m = get_mqd(mqd);
>  
>  	valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
> -	acquire_queue(kgd, pipe_id, queue_id);
> -
> -	WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
> -	WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
> -	WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
> -
> -	WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
> -	WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
> -	WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
> -	WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
> -	WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
> -	WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
> -	WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
> -	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
> -	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
> -			m->cp_hqd_pq_rptr_report_addr_hi);
> -
>  	if (valid_wptr > 0)
> -		WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr);
> -
> -	WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
> -	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
> -
> -	WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo);
> -	WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi);
> -	WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control);
> -	WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
> -	WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
> -	WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events);
> -
> -	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo);
> -	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi);
> -	WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control);
> -	WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset);
> -	WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size);
> -	WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset);
> -	WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size);
> -
> -	WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
> -
> -	WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request);
> -	WREG32(mmCP_HQD_ERROR, m->cp_hqd_error);
> -	WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
> -	WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones);
> -
> -	WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
> +		m->cp_hqd_pq_wptr = valid_wptr;
>  
> +	acquire_queue(kgd, pipe_id, queue_id);
> +	gfx_v8_0_mqd_commit(adev, mqd);
>  	release_queue(kgd);
>  
>  	return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 4a279bb..d226804 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -3038,12 +3038,29 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
>  	/* set the vmid for the queue */
>  	mqd->cp_hqd_vmid = 0;
>  
> +	/* defaults */
> +	mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
> +	mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
> +	mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
> +	mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
> +	mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
> +	mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
> +	mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
> +	mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
> +	mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
> +	mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
> +	mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
> +	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
> +	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
> +	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
> +	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
> +	mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
> +
>  	/* activate the queue */
>  	mqd->cp_hqd_active = 1;
>  }
>  
> -static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev,
> -			       struct cik_mqd *mqd)
> +int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
>  {
>  	u32 tmp;
>  
> @@ -3067,6 +3084,23 @@ static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev,
>  	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
>  	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
>  
> +	WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control);
> +	WREG32(mmCP_HQD_IB_BASE_ADDR, mqd->cp_hqd_ib_base_addr_lo);
> +	WREG32(mmCP_HQD_IB_BASE_ADDR_HI, mqd->cp_hqd_ib_base_addr_hi);
> +	WREG32(mmCP_HQD_IB_RPTR, mqd->cp_hqd_ib_rptr);
> +	WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
> +	WREG32(mmCP_HQD_SEMA_CMD, mqd->cp_hqd_sema_cmd);
> +	WREG32(mmCP_HQD_MSG_TYPE, mqd->cp_hqd_msg_type);
> +	WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, mqd->cp_hqd_atomic0_preop_lo);
> +	WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, mqd->cp_hqd_atomic0_preop_hi);
> +	WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, mqd->cp_hqd_atomic1_preop_lo);
> +	WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, mqd->cp_hqd_atomic1_preop_hi);
> +	WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
> +	WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum);
> +	WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority);
> +	WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority);
> +	WREG32(mmCP_HQD_IQ_RPTR, mqd->cp_hqd_iq_rptr);
> +
>  	/* activate the HQD */
>  	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
> index 2f5164c..6fb9c15 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
> @@ -29,4 +29,9 @@ extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block;
>  extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block;
>  extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block;
>  
> +struct amdgpu_device;
> +struct cik_mqd;
> +
> +int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd);
> +
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 442cd66..7755d58 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -4762,6 +4762,26 @@ static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
>  	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
>  	mqd->cp_hqd_persistent_state = tmp;
>  
> +	/* defaults */
> +	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
> +	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
> +	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
> +	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
> +	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
> +	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
> +	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
> +	mqd->cp_hqd_ctx_save_control = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
> +	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
> +	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
> +	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
> +	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
> +	mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
> +	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
> +	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
> +	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
> +	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
> +
> +
>  	/* activate the queue */
>  	mqd->cp_hqd_active = 1;
>  
> @@ -4815,7 +4835,7 @@ static void gfx_v8_0_enable_doorbell(struct amdgpu_device *adev, bool enable)
>  	adev->doorbell_enabled = true;
>  }
>  
> -static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd)
> +int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd)
>  {
>  	uint32_t tmp;
>  
> @@ -4867,6 +4887,28 @@ static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd)
>  
>  	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
>  	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
> +	WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
> +	WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
> +
> +	/* set the HQD priority */
> +	WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority);
> +	WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority);
> +	WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum);
> +
> +	/* set cwsr save area */
> +	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, mqd->cp_hqd_ctx_save_base_addr_lo);
> +	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, mqd->cp_hqd_ctx_save_base_addr_hi);
> +	WREG32(mmCP_HQD_CTX_SAVE_CONTROL, mqd->cp_hqd_ctx_save_control);
> +	WREG32(mmCP_HQD_CNTL_STACK_OFFSET, mqd->cp_hqd_cntl_stack_offset);
> +	WREG32(mmCP_HQD_CNTL_STACK_SIZE, mqd->cp_hqd_cntl_stack_size);
> +	WREG32(mmCP_HQD_WG_STATE_OFFSET, mqd->cp_hqd_wg_state_offset);
> +	WREG32(mmCP_HQD_CTX_SAVE_SIZE, mqd->cp_hqd_ctx_save_size);
> +
> +	WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control);
> +	WREG32(mmCP_HQD_EOP_EVENTS, mqd->cp_hqd_eop_done_events);
> +	WREG32(mmCP_HQD_ERROR, mqd->cp_hqd_error);
> +	WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
> +	WREG32(mmCP_HQD_EOP_DONES, mqd->cp_hqd_eop_dones);
>  
>  	/* set the vmid for the queue */
>  	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
> index 788cc3a..ec3f11f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
> @@ -27,4 +27,9 @@
>  extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block;
>  extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block;
>  
> +struct amdgpu_device;
> +struct vi_mqd;
> +
> +int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd);
> +
>  #endif


[-- Attachment #1.2: Type: text/html, Size: 14890 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Change queue/pipe split between amdkfd and amdgpu
       [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (12 preceding siblings ...)
  2017-02-04  4:51   ` [PATCH 13/13] drm/amdgpu: new queue policy, take first 2 queues of each pipe Andres Rodriguez
@ 2017-02-06 20:20   ` Felix Kuehling
       [not found]     ` <206b40fe-b958-8a78-623b-011712dc5ecc-5C7GfCeVMHo@public.gmane.org>
  13 siblings, 1 reply; 31+ messages in thread
From: Felix Kuehling @ 2017-02-06 20:20 UTC (permalink / raw)
  To: Andres Rodriguez, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	John.Bridgman-5C7GfCeVMHo

Hi Andres,

Thank you for tackling this task. It's more involved than I expected,
mostly because I didn't have much awareness of the MQD management in amdgpu.

I made one comment in a separate message about the unified MQD commit
function, if you want to bring that more in line with our latest ROCm
release on github.

Also, were you able to test the upstream KFD with your changes on a
Kaveri or Carrizo?

Regards,
  Felix


On 17-02-03 11:51 PM, Andres Rodriguez wrote:
> The current queue/pipe split policy is for amdgpu to take the first pipe of
> MEC0 and leave the rest for amdkfd to use. This policy is taken as an
> assumption in a few areas of the implementation.
>
> This patch series aims to allow for flexible/tunable queue/pipe split policies
> between kgd and kfd. It also updates the queue/pipe split policy to one that 
> allows better compute app concurrency for both drivers.
>
> In the process some duplicate code and hardcoded constants were removed.
>
> Any suggestions or feedback on improvements welcome.
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Change queue/pipe split between amdkfd and amdgpu
       [not found]     ` <206b40fe-b958-8a78-623b-011712dc5ecc-5C7GfCeVMHo@public.gmane.org>
@ 2017-02-08 16:23       ` Andres Rodriguez
       [not found]         ` <852b8cfe-d886-e78e-de6c-1641b107ed8f-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-08 16:23 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	John.Bridgman-5C7GfCeVMHo

Hey Felix,

Thanks for the pointer to the ROCm mqd commit. I like that the 
workarounds are easy to spot. I'll add that to a new patch series I'm 
working on for some bug-fixes for perf being lower on pipes other than 
pipe 0.

I haven't tested this yet on kaveri/carrizo. I'm hoping someone with the 
HW will be able to give it a go. I put in a few small hacks to get KFD 
to boot but do nothing on polaris10.

Regards,
Andres

On 2017-02-06 03:20 PM, Felix Kuehling wrote:
> Hi Andres,
>
> Thank you for tackling this task. It's more involved than I expected,
> mostly because I didn't have much awareness of the MQD management in amdgpu.
>
> I made one comment in a separate message about the unified MQD commit
> function, if you want to bring that more in line with our latest ROCm
> release on github.
>
> Also, were you able to test the upstream KFD with your changes on a
> Kaveri or Carrizo?
>
> Regards,
>   Felix
>
>
> On 17-02-03 11:51 PM, Andres Rodriguez wrote:
>> The current queue/pipe split policy is for amdgpu to take the first pipe of
>> MEC0 and leave the rest for amdkfd to use. This policy is taken as an
>> assumption in a few areas of the implementation.
>>
>> This patch series aims to allow for flexible/tunable queue/pipe split policies
>> between kgd and kfd. It also updates the queue/pipe split policy to one that
>> allows better compute app concurrency for both drivers.
>>
>> In the process some duplicate code and hardcoded constants were removed.
>>
>> Any suggestions or feedback on improvements welcome.
>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 02/13] drm/amdgpu: doorbell registers need only be set once
       [not found]         ` <7d85c562-5227-1fef-7b99-1f7543e6e69b-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-02-08 16:25           ` Andres Rodriguez
  0 siblings, 0 replies; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-08 16:25 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Alexander.Deucher-5C7GfCeVMHo, jay-gJmSnxjMpeIFV7jr3Ov9Ew,
	Felix.Kuehling-5C7GfCeVMHo, John.Bridgman-5C7GfCeVMHo



On 2017-02-06 03:31 AM, Christian König wrote:
> Am 04.02.2017 um 05:51 schrieb Andres Rodriguez:
>> The CP_MEC_DOORBELL_RANGE_* and CP_PQ_STATUS.DOORBELL_ENABLE registers
>> are not HQD specific.
>>
>> They only need to be set once if at least 1 pipe requested doorbell
>> support.
>>
>> Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 1 +
>>   drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +++++-
>>   2 files changed, 6 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 147ce0e..9740800 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -1427,6 +1427,7 @@ struct amdgpu_device {
>>       unsigned            num_rings;
>>       struct amdgpu_ring        *rings[AMDGPU_MAX_RINGS];
>>       bool                ib_pool_ready;
>> +    bool                doorbell_enabled;
>
> Better put that into amdgpu_gfx and not amdgpu_device, cause that is a
> gfx (CP) specific state.
>
> Apart from that the patch looks good to me.
>
> Christian.

Thanks for the review Christian.

If you don't mind I'll fix this in a followup that also includes a bit 
of changes to interrupt management.

Andres

>
>>       struct amdgpu_sa_manager    ring_tmp_bo;
>>         /* interrupts */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> index cf738e5..5d0e2c8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> @@ -4796,7 +4796,7 @@ static void gfx_v8_0_enable_doorbell(struct
>> amdgpu_device *adev, bool enable)
>>   {
>>       uint32_t tmp;
>>   -    if (!enable)
>> +    if (!enable || adev->doorbell_enabled)
>>           return;
>>         if ((adev->asic_type == CHIP_CARRIZO) ||
>> @@ -4811,6 +4811,8 @@ static void gfx_v8_0_enable_doorbell(struct
>> amdgpu_device *adev, bool enable)
>>       tmp = RREG32(mmCP_PQ_STATUS);
>>       tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
>>       WREG32(mmCP_PQ_STATUS, tmp);
>> +
>> +    adev->doorbell_enabled = true;
>>   }
>>     static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct
>> vi_mqd *mqd)
>> @@ -5108,6 +5110,8 @@ static int gfx_v8_0_cp_resume(struct
>> amdgpu_device *adev)
>>   {
>>       int r;
>>   +    adev->doorbell_enabled = false;
>> +
>>       if (!(adev->flags & AMD_IS_APU))
>>           gfx_v8_0_enable_gui_idle_interrupt(adev, false);
>>
>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Change queue/pipe split between amdkfd and amdgpu
       [not found]         ` <852b8cfe-d886-e78e-de6c-1641b107ed8f-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-02-08 19:32           ` Oded Gabbay
       [not found]             ` <CAFCwf10BboWSwU9HAMjryuLw2K2ANpjC8hGgGwkJh6z8K3pR4Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 31+ messages in thread
From: Oded Gabbay @ 2017-02-08 19:32 UTC (permalink / raw)
  To: Andres Rodriguez
  Cc: Alex Deucher, jay-gJmSnxjMpeIFV7jr3Ov9Ew, Felix Kuehling,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, John Bridgman

On Wed, Feb 8, 2017 at 6:23 PM, Andres Rodriguez <andresx7@gmail.com> wrote:
> Hey Felix,
>
> Thanks for the pointer to the ROCm mqd commit. I like that the workarounds
> are easy to spot. I'll add that to a new patch series I'm working on for
> some bug-fixes for perf being lower on pipes other than pipe 0.
>
> I haven't tested this yet on kaveri/carrizo. I'm hoping someone with the HW
> will be able to give it a go. I put in a few small hacks to get KFD to boot
> but do nothing on polaris10.
>
> Regards,
> Andres
>
>
> On 2017-02-06 03:20 PM, Felix Kuehling wrote:
>>
>> Hi Andres,
>>
>> Thank you for tackling this task. It's more involved than I expected,
>> mostly because I didn't have much awareness of the MQD management in
>> amdgpu.
>>
>> I made one comment in a separate message about the unified MQD commit
>> function, if you want to bring that more in line with our latest ROCm
>> release on github.
>>
>> Also, were you able to test the upstream KFD with your changes on a
>> Kaveri or Carrizo?
>>
>> Regards,
>>   Felix
>>
>>
>> On 17-02-03 11:51 PM, Andres Rodriguez wrote:
>>>
>>> The current queue/pipe split policy is for amdgpu to take the first pipe
>>> of
>>> MEC0 and leave the rest for amdkfd to use. This policy is taken as an
>>> assumption in a few areas of the implementation.
>>>
>>> This patch series aims to allow for flexible/tunable queue/pipe split
>>> policies
>>> between kgd and kfd. It also updates the queue/pipe split policy to one
>>> that
>>> allows better compute app concurrency for both drivers.
>>>
>>> In the process some duplicate code and hardcoded constants were removed.
>>>
>>> Any suggestions or feedback on improvements welcome.
>>>
>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Hi Andres,
I will try to find sometime to test it on my Kaveri machine.

Oded
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Change queue/pipe split between amdkfd and amdgpu
       [not found]             ` <CAFCwf10BboWSwU9HAMjryuLw2K2ANpjC8hGgGwkJh6z8K3pR4Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-02-08 19:47               ` Andres Rodriguez
       [not found]                 ` <50aecc56-f080-d343-9e49-e3955ba1212e-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-08 19:47 UTC (permalink / raw)
  To: Oded Gabbay
  Cc: Alex Deucher, jay-gJmSnxjMpeIFV7jr3Ov9Ew, Felix Kuehling,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, John Bridgman

Thank you Oded.

- Andres

On 2017-02-08 02:32 PM, Oded Gabbay wrote:
> On Wed, Feb 8, 2017 at 6:23 PM, Andres Rodriguez <andresx7@gmail.com> wrote:
>> Hey Felix,
>>
>> Thanks for the pointer to the ROCm mqd commit. I like that the workarounds
>> are easy to spot. I'll add that to a new patch series I'm working on for
>> some bug-fixes for perf being lower on pipes other than pipe 0.
>>
>> I haven't tested this yet on kaveri/carrizo. I'm hoping someone with the HW
>> will be able to give it a go. I put in a few small hacks to get KFD to boot
>> but do nothing on polaris10.
>>
>> Regards,
>> Andres
>>
>>
>> On 2017-02-06 03:20 PM, Felix Kuehling wrote:
>>> Hi Andres,
>>>
>>> Thank you for tackling this task. It's more involved than I expected,
>>> mostly because I didn't have much awareness of the MQD management in
>>> amdgpu.
>>>
>>> I made one comment in a separate message about the unified MQD commit
>>> function, if you want to bring that more in line with our latest ROCm
>>> release on github.
>>>
>>> Also, were you able to test the upstream KFD with your changes on a
>>> Kaveri or Carrizo?
>>>
>>> Regards,
>>>    Felix
>>>
>>>
>>> On 17-02-03 11:51 PM, Andres Rodriguez wrote:
>>>> The current queue/pipe split policy is for amdgpu to take the first pipe
>>>> of
>>>> MEC0 and leave the rest for amdkfd to use. This policy is taken as an
>>>> assumption in a few areas of the implementation.
>>>>
>>>> This patch series aims to allow for flexible/tunable queue/pipe split
>>>> policies
>>>> between kgd and kfd. It also updates the queue/pipe split policy to one
>>>> that
>>>> allows better compute app concurrency for both drivers.
>>>>
>>>> In the process some duplicate code and hardcoded constants were removed.
>>>>
>>>> Any suggestions or feedback on improvements welcome.
>>>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> Hi Andres,
> I will try to find sometime to test it on my Kaveri machine.
>
> Oded

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Change queue/pipe split between amdkfd and amdgpu
       [not found]                 ` <50aecc56-f080-d343-9e49-e3955ba1212e-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-02-09 20:11                   ` Oded Gabbay
       [not found]                     ` <CAFCwf10L8=mwxCup6-S5Yirxit8MJEZR=rhLPF3NVnotGCSYiQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 31+ messages in thread
From: Oded Gabbay @ 2017-02-09 20:11 UTC (permalink / raw)
  To: Andres Rodriguez
  Cc: Alex Deucher, Jay Cornwall, Felix Kuehling,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, John Bridgman

 Andres,

I tried your patches on Kaveri with airlied's drm-next branch.
I used radeon+amdkfd

The following test failed: KFDQMTest.CreateMultipleCpQueues
However, I can't debug it because I don't have the sources of kfdtest.

In dmesg, I saw the following warning during boot:
WARNING: CPU: 0 PID: 150 at
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c:670
start_cpsch+0xc5/0x220 [amdkfd]
[    4.393796] Modules linked in: hid_logitech_hidpp hid_logitech_dj
hid_generic usbhid hid uas usb_storage amdkfd amd_iommu_v2 radeon(+)
i2c_algo_bit ttm drm_kms_helper syscopyarea ahci sysfillrect sysimgblt
libahci fb_sys_fops drm r8169 mii fjes video
[    4.393811] CPU: 0 PID: 150 Comm: systemd-udevd Not tainted 4.10.0-rc5+ #1
[    4.393811] Hardware name: Gigabyte Technology Co., Ltd. To be
filled by O.E.M./F2A88XM-D3H, BIOS F5 01/09/2014
[    4.393812] Call Trace:
[    4.393818]  dump_stack+0x63/0x90
[    4.393822]  __warn+0xcb/0xf0
[    4.393823]  warn_slowpath_null+0x1d/0x20
[    4.393830]  start_cpsch+0xc5/0x220 [amdkfd]
[    4.393836]  ? initialize_cpsch+0xa0/0xb0 [amdkfd]
[    4.393841]  kgd2kfd_device_init+0x375/0x490 [amdkfd]
[    4.393883]  radeon_kfd_device_init+0xaf/0xd0 [radeon]
[    4.393911]  radeon_driver_load_kms+0x11e/0x1f0 [radeon]
[    4.393933]  drm_dev_register+0x14a/0x200 [drm]
[    4.393946]  drm_get_pci_dev+0x9d/0x160 [drm]
[    4.393974]  radeon_pci_probe+0xb8/0xe0 [radeon]
[    4.393976]  local_pci_probe+0x45/0xa0
[    4.393978]  pci_device_probe+0x103/0x150
[    4.393981]  driver_probe_device+0x2bf/0x460
[    4.393982]  __driver_attach+0xdf/0xf0
[    4.393984]  ? driver_probe_device+0x460/0x460
[    4.393985]  bus_for_each_dev+0x6c/0xc0
[    4.393987]  driver_attach+0x1e/0x20
[    4.393988]  bus_add_driver+0x1fd/0x270
[    4.393989]  ? 0xffffffffc05c8000
[    4.393991]  driver_register+0x60/0xe0
[    4.393992]  ? 0xffffffffc05c8000
[    4.393993]  __pci_register_driver+0x4c/0x50
[    4.394007]  drm_pci_init+0xeb/0x100 [drm]
[    4.394008]  ? 0xffffffffc05c8000
[    4.394031]  radeon_init+0x98/0xb6 [radeon]
[    4.394034]  do_one_initcall+0x53/0x1a0
[    4.394037]  ? __vunmap+0x81/0xd0
[    4.394039]  ? kmem_cache_alloc_trace+0x152/0x1c0
[    4.394041]  ? vfree+0x2e/0x70
[    4.394044]  do_init_module+0x5f/0x1ff
[    4.394046]  load_module+0x24cc/0x29f0
[    4.394047]  ? __symbol_put+0x60/0x60
[    4.394050]  ? security_kernel_post_read_file+0x6b/0x80
[    4.394052]  SYSC_finit_module+0xdf/0x110
[    4.394054]  SyS_finit_module+0xe/0x10
[    4.394056]  entry_SYSCALL_64_fastpath+0x1e/0xad
[    4.394058] RIP: 0033:0x7f9cda77c8e9
[    4.394059] RSP: 002b:00007ffe195d3378 EFLAGS: 00000246 ORIG_RAX:
0000000000000139
[    4.394060] RAX: ffffffffffffffda RBX: 00007f9cdb8dda7e RCX: 00007f9cda77c8e9
[    4.394061] RDX: 0000000000000000 RSI: 00007f9cdac7ce2a RDI: 0000000000000013
[    4.394062] RBP: 00007ffe195d2450 R08: 0000000000000000 R09: 0000000000000000
[    4.394063] R10: 0000000000000013 R11: 0000000000000246 R12: 00007ffe195d245a
[    4.394063] R13: 00007ffe195d1378 R14: 0000563f70cc93b0 R15: 0000563f70cba4d0
[    4.394091] ---[ end trace 9c5af17304d998bb ]---
[    4.394092] Invalid queue enabled by amdgpu: 9

I suggest you get a Kaveri/Carrizo machine to debug these issues.

Until that, I don't think we should merge this patch-set.

Oded

On Wed, Feb 8, 2017 at 9:47 PM, Andres Rodriguez <andresx7@gmail.com> wrote:
> Thank you Oded.
>
> - Andres
>
>
> On 2017-02-08 02:32 PM, Oded Gabbay wrote:
>>
>> On Wed, Feb 8, 2017 at 6:23 PM, Andres Rodriguez <andresx7@gmail.com>
>> wrote:
>>>
>>> Hey Felix,
>>>
>>> Thanks for the pointer to the ROCm mqd commit. I like that the
>>> workarounds
>>> are easy to spot. I'll add that to a new patch series I'm working on for
>>> some bug-fixes for perf being lower on pipes other than pipe 0.
>>>
>>> I haven't tested this yet on kaveri/carrizo. I'm hoping someone with the
>>> HW
>>> will be able to give it a go. I put in a few small hacks to get KFD to
>>> boot
>>> but do nothing on polaris10.
>>>
>>> Regards,
>>> Andres
>>>
>>>
>>> On 2017-02-06 03:20 PM, Felix Kuehling wrote:
>>>>
>>>> Hi Andres,
>>>>
>>>> Thank you for tackling this task. It's more involved than I expected,
>>>> mostly because I didn't have much awareness of the MQD management in
>>>> amdgpu.
>>>>
>>>> I made one comment in a separate message about the unified MQD commit
>>>> function, if you want to bring that more in line with our latest ROCm
>>>> release on github.
>>>>
>>>> Also, were you able to test the upstream KFD with your changes on a
>>>> Kaveri or Carrizo?
>>>>
>>>> Regards,
>>>>    Felix
>>>>
>>>>
>>>> On 17-02-03 11:51 PM, Andres Rodriguez wrote:
>>>>>
>>>>> The current queue/pipe split policy is for amdgpu to take the first
>>>>> pipe
>>>>> of
>>>>> MEC0 and leave the rest for amdkfd to use. This policy is taken as an
>>>>> assumption in a few areas of the implementation.
>>>>>
>>>>> This patch series aims to allow for flexible/tunable queue/pipe split
>>>>> policies
>>>>> between kgd and kfd. It also updates the queue/pipe split policy to one
>>>>> that
>>>>> allows better compute app concurrency for both drivers.
>>>>>
>>>>> In the process some duplicate code and hardcoded constants were
>>>>> removed.
>>>>>
>>>>> Any suggestions or feedback on improvements welcome.
>>>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>> Hi Andres,
>> I will try to find sometime to test it on my Kaveri machine.
>>
>> Oded
>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Change queue/pipe split between amdkfd and amdgpu
       [not found]                     ` <CAFCwf10L8=mwxCup6-S5Yirxit8MJEZR=rhLPF3NVnotGCSYiQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-02-09 20:19                       ` Andres Rodriguez
  2017-02-09 20:38                       ` Andres Rodriguez
  1 sibling, 0 replies; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-09 20:19 UTC (permalink / raw)
  To: Oded Gabbay
  Cc: Alex Deucher, Jay Cornwall, Felix Kuehling, amd-gfx list, John Bridgman


[-- Attachment #1.1: Type: text/plain, Size: 6156 bytes --]

Thanks Oded for the test results.

I'll work on a fix.

Regards,
Andres

On Thu, Feb 9, 2017 at 3:11 PM, Oded Gabbay <oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:

>  Andres,
>
> I tried your patches on Kaveri with airlied's drm-next branch.
> I used radeon+amdkfd
>
> The following test failed: KFDQMTest.CreateMultipleCpQueues
> However, I can't debug it because I don't have the sources of kfdtest.
>
> In dmesg, I saw the following warning during boot:
> WARNING: CPU: 0 PID: 150 at
> drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c:670
> start_cpsch+0xc5/0x220 [amdkfd]
> [    4.393796] Modules linked in: hid_logitech_hidpp hid_logitech_dj
> hid_generic usbhid hid uas usb_storage amdkfd amd_iommu_v2 radeon(+)
> i2c_algo_bit ttm drm_kms_helper syscopyarea ahci sysfillrect sysimgblt
> libahci fb_sys_fops drm r8169 mii fjes video
> [    4.393811] CPU: 0 PID: 150 Comm: systemd-udevd Not tainted 4.10.0-rc5+
> #1
> [    4.393811] Hardware name: Gigabyte Technology Co., Ltd. To be
> filled by O.E.M./F2A88XM-D3H, BIOS F5 01/09/2014
> [    4.393812] Call Trace:
> [    4.393818]  dump_stack+0x63/0x90
> [    4.393822]  __warn+0xcb/0xf0
> [    4.393823]  warn_slowpath_null+0x1d/0x20
> [    4.393830]  start_cpsch+0xc5/0x220 [amdkfd]
> [    4.393836]  ? initialize_cpsch+0xa0/0xb0 [amdkfd]
> [    4.393841]  kgd2kfd_device_init+0x375/0x490 [amdkfd]
> [    4.393883]  radeon_kfd_device_init+0xaf/0xd0 [radeon]
> [    4.393911]  radeon_driver_load_kms+0x11e/0x1f0 [radeon]
> [    4.393933]  drm_dev_register+0x14a/0x200 [drm]
> [    4.393946]  drm_get_pci_dev+0x9d/0x160 [drm]
> [    4.393974]  radeon_pci_probe+0xb8/0xe0 [radeon]
> [    4.393976]  local_pci_probe+0x45/0xa0
> [    4.393978]  pci_device_probe+0x103/0x150
> [    4.393981]  driver_probe_device+0x2bf/0x460
> [    4.393982]  __driver_attach+0xdf/0xf0
> [    4.393984]  ? driver_probe_device+0x460/0x460
> [    4.393985]  bus_for_each_dev+0x6c/0xc0
> [    4.393987]  driver_attach+0x1e/0x20
> [    4.393988]  bus_add_driver+0x1fd/0x270
> [    4.393989]  ? 0xffffffffc05c8000
> [    4.393991]  driver_register+0x60/0xe0
> [    4.393992]  ? 0xffffffffc05c8000
> [    4.393993]  __pci_register_driver+0x4c/0x50
> [    4.394007]  drm_pci_init+0xeb/0x100 [drm]
> [    4.394008]  ? 0xffffffffc05c8000
> [    4.394031]  radeon_init+0x98/0xb6 [radeon]
> [    4.394034]  do_one_initcall+0x53/0x1a0
> [    4.394037]  ? __vunmap+0x81/0xd0
> [    4.394039]  ? kmem_cache_alloc_trace+0x152/0x1c0
> [    4.394041]  ? vfree+0x2e/0x70
> [    4.394044]  do_init_module+0x5f/0x1ff
> [    4.394046]  load_module+0x24cc/0x29f0
> [    4.394047]  ? __symbol_put+0x60/0x60
> [    4.394050]  ? security_kernel_post_read_file+0x6b/0x80
> [    4.394052]  SYSC_finit_module+0xdf/0x110
> [    4.394054]  SyS_finit_module+0xe/0x10
> [    4.394056]  entry_SYSCALL_64_fastpath+0x1e/0xad
> [    4.394058] RIP: 0033:0x7f9cda77c8e9
> [    4.394059] RSP: 002b:00007ffe195d3378 EFLAGS: 00000246 ORIG_RAX:
> 0000000000000139
> [    4.394060] RAX: ffffffffffffffda RBX: 00007f9cdb8dda7e RCX:
> 00007f9cda77c8e9
> [    4.394061] RDX: 0000000000000000 RSI: 00007f9cdac7ce2a RDI:
> 0000000000000013
> [    4.394062] RBP: 00007ffe195d2450 R08: 0000000000000000 R09:
> 0000000000000000
> [    4.394063] R10: 0000000000000013 R11: 0000000000000246 R12:
> 00007ffe195d245a
> [    4.394063] R13: 00007ffe195d1378 R14: 0000563f70cc93b0 R15:
> 0000563f70cba4d0
> [    4.394091] ---[ end trace 9c5af17304d998bb ]---
> [    4.394092] Invalid queue enabled by amdgpu: 9
>
> I suggest you get a Kaveri/Carrizo machine to debug these issues.
>
> Until that, I don't think we should merge this patch-set.
>
> Oded
>
> On Wed, Feb 8, 2017 at 9:47 PM, Andres Rodriguez <andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> wrote:
> > Thank you Oded.
> >
> > - Andres
> >
> >
> > On 2017-02-08 02:32 PM, Oded Gabbay wrote:
> >>
> >> On Wed, Feb 8, 2017 at 6:23 PM, Andres Rodriguez <andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
> >> wrote:
> >>>
> >>> Hey Felix,
> >>>
> >>> Thanks for the pointer to the ROCm mqd commit. I like that the
> >>> workarounds
> >>> are easy to spot. I'll add that to a new patch series I'm working on
> for
> >>> some bug-fixes for perf being lower on pipes other than pipe 0.
> >>>
> >>> I haven't tested this yet on kaveri/carrizo. I'm hoping someone with
> the
> >>> HW
> >>> will be able to give it a go. I put in a few small hacks to get KFD to
> >>> boot
> >>> but do nothing on polaris10.
> >>>
> >>> Regards,
> >>> Andres
> >>>
> >>>
> >>> On 2017-02-06 03:20 PM, Felix Kuehling wrote:
> >>>>
> >>>> Hi Andres,
> >>>>
> >>>> Thank you for tackling this task. It's more involved than I expected,
> >>>> mostly because I didn't have much awareness of the MQD management in
> >>>> amdgpu.
> >>>>
> >>>> I made one comment in a separate message about the unified MQD commit
> >>>> function, if you want to bring that more in line with our latest ROCm
> >>>> release on github.
> >>>>
> >>>> Also, were you able to test the upstream KFD with your changes on a
> >>>> Kaveri or Carrizo?
> >>>>
> >>>> Regards,
> >>>>    Felix
> >>>>
> >>>>
> >>>> On 17-02-03 11:51 PM, Andres Rodriguez wrote:
> >>>>>
> >>>>> The current queue/pipe split policy is for amdgpu to take the first
> >>>>> pipe
> >>>>> of
> >>>>> MEC0 and leave the rest for amdkfd to use. This policy is taken as an
> >>>>> assumption in a few areas of the implementation.
> >>>>>
> >>>>> This patch series aims to allow for flexible/tunable queue/pipe split
> >>>>> policies
> >>>>> between kgd and kfd. It also updates the queue/pipe split policy to
> one
> >>>>> that
> >>>>> allows better compute app concurrency for both drivers.
> >>>>>
> >>>>> In the process some duplicate code and hardcoded constants were
> >>>>> removed.
> >>>>>
> >>>>> Any suggestions or feedback on improvements welcome.
> >>>>>
> >>> _______________________________________________
> >>> amd-gfx mailing list
> >>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> >>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> >>
> >> Hi Andres,
> >> I will try to find sometime to test it on my Kaveri machine.
> >>
> >> Oded
> >
> >
>

[-- Attachment #1.2: Type: text/html, Size: 8408 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Change queue/pipe split between amdkfd and amdgpu
       [not found]                     ` <CAFCwf10L8=mwxCup6-S5Yirxit8MJEZR=rhLPF3NVnotGCSYiQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2017-02-09 20:19                       ` Andres Rodriguez
@ 2017-02-09 20:38                       ` Andres Rodriguez
       [not found]                         ` <8e51b688-d978-d40f-8aa3-ae1090ab6a03-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  1 sibling, 1 reply; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-09 20:38 UTC (permalink / raw)
  To: Oded Gabbay, Felix Kuehling, John Bridgman
  Cc: Alex Deucher, Jay Cornwall, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Hey Oded,

Sorry to be a nuisance, but if you have everything still setup could you 
give this fix a quick go?

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 5321d18..9f70ee0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -667,7 +667,7 @@ static int set_sched_resources(struct 
device_queue_manager *dqm)
                 /* This situation may be hit in the future if a new HW
                  * generation exposes more than 64 queues. If so, the
                  * definition of res.queue_mask needs updating */
-               if (WARN_ON(i > sizeof(res.queue_mask))) {
+               if (WARN_ON(i > (sizeof(res.queue_mask)*8))) {
                         pr_err("Invalid queue enabled by amdgpu: %d\n", i);
                         break;
                 }

John/Felix,

Any chance I could borrow a carrizo/kaveri for a few days? Or maybe you 
could help me run some final tests on this patch series?

- Andres


On 2017-02-09 03:11 PM, Oded Gabbay wrote:
>   Andres,
>
> I tried your patches on Kaveri with airlied's drm-next branch.
> I used radeon+amdkfd
>
> The following test failed: KFDQMTest.CreateMultipleCpQueues
> However, I can't debug it because I don't have the sources of kfdtest.
>
> In dmesg, I saw the following warning during boot:
> WARNING: CPU: 0 PID: 150 at
> drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c:670
> start_cpsch+0xc5/0x220 [amdkfd]
> [    4.393796] Modules linked in: hid_logitech_hidpp hid_logitech_dj
> hid_generic usbhid hid uas usb_storage amdkfd amd_iommu_v2 radeon(+)
> i2c_algo_bit ttm drm_kms_helper syscopyarea ahci sysfillrect sysimgblt
> libahci fb_sys_fops drm r8169 mii fjes video
> [    4.393811] CPU: 0 PID: 150 Comm: systemd-udevd Not tainted 4.10.0-rc5+ #1
> [    4.393811] Hardware name: Gigabyte Technology Co., Ltd. To be
> filled by O.E.M./F2A88XM-D3H, BIOS F5 01/09/2014
> [    4.393812] Call Trace:
> [    4.393818]  dump_stack+0x63/0x90
> [    4.393822]  __warn+0xcb/0xf0
> [    4.393823]  warn_slowpath_null+0x1d/0x20
> [    4.393830]  start_cpsch+0xc5/0x220 [amdkfd]
> [    4.393836]  ? initialize_cpsch+0xa0/0xb0 [amdkfd]
> [    4.393841]  kgd2kfd_device_init+0x375/0x490 [amdkfd]
> [    4.393883]  radeon_kfd_device_init+0xaf/0xd0 [radeon]
> [    4.393911]  radeon_driver_load_kms+0x11e/0x1f0 [radeon]
> [    4.393933]  drm_dev_register+0x14a/0x200 [drm]
> [    4.393946]  drm_get_pci_dev+0x9d/0x160 [drm]
> [    4.393974]  radeon_pci_probe+0xb8/0xe0 [radeon]
> [    4.393976]  local_pci_probe+0x45/0xa0
> [    4.393978]  pci_device_probe+0x103/0x150
> [    4.393981]  driver_probe_device+0x2bf/0x460
> [    4.393982]  __driver_attach+0xdf/0xf0
> [    4.393984]  ? driver_probe_device+0x460/0x460
> [    4.393985]  bus_for_each_dev+0x6c/0xc0
> [    4.393987]  driver_attach+0x1e/0x20
> [    4.393988]  bus_add_driver+0x1fd/0x270
> [    4.393989]  ? 0xffffffffc05c8000
> [    4.393991]  driver_register+0x60/0xe0
> [    4.393992]  ? 0xffffffffc05c8000
> [    4.393993]  __pci_register_driver+0x4c/0x50
> [    4.394007]  drm_pci_init+0xeb/0x100 [drm]
> [    4.394008]  ? 0xffffffffc05c8000
> [    4.394031]  radeon_init+0x98/0xb6 [radeon]
> [    4.394034]  do_one_initcall+0x53/0x1a0
> [    4.394037]  ? __vunmap+0x81/0xd0
> [    4.394039]  ? kmem_cache_alloc_trace+0x152/0x1c0
> [    4.394041]  ? vfree+0x2e/0x70
> [    4.394044]  do_init_module+0x5f/0x1ff
> [    4.394046]  load_module+0x24cc/0x29f0
> [    4.394047]  ? __symbol_put+0x60/0x60
> [    4.394050]  ? security_kernel_post_read_file+0x6b/0x80
> [    4.394052]  SYSC_finit_module+0xdf/0x110
> [    4.394054]  SyS_finit_module+0xe/0x10
> [    4.394056]  entry_SYSCALL_64_fastpath+0x1e/0xad
> [    4.394058] RIP: 0033:0x7f9cda77c8e9
> [    4.394059] RSP: 002b:00007ffe195d3378 EFLAGS: 00000246 ORIG_RAX:
> 0000000000000139
> [    4.394060] RAX: ffffffffffffffda RBX: 00007f9cdb8dda7e RCX: 00007f9cda77c8e9
> [    4.394061] RDX: 0000000000000000 RSI: 00007f9cdac7ce2a RDI: 0000000000000013
> [    4.394062] RBP: 00007ffe195d2450 R08: 0000000000000000 R09: 0000000000000000
> [    4.394063] R10: 0000000000000013 R11: 0000000000000246 R12: 00007ffe195d245a
> [    4.394063] R13: 00007ffe195d1378 R14: 0000563f70cc93b0 R15: 0000563f70cba4d0
> [    4.394091] ---[ end trace 9c5af17304d998bb ]---
> [    4.394092] Invalid queue enabled by amdgpu: 9
>
> I suggest you get a Kaveri/Carrizo machine to debug these issues.
>
> Until that, I don't think we should merge this patch-set.
>
> Oded
>
> On Wed, Feb 8, 2017 at 9:47 PM, Andres Rodriguez <andresx7@gmail.com> wrote:
>> Thank you Oded.
>>
>> - Andres
>>
>>
>> On 2017-02-08 02:32 PM, Oded Gabbay wrote:
>>> On Wed, Feb 8, 2017 at 6:23 PM, Andres Rodriguez <andresx7@gmail.com>
>>> wrote:
>>>> Hey Felix,
>>>>
>>>> Thanks for the pointer to the ROCm mqd commit. I like that the
>>>> workarounds
>>>> are easy to spot. I'll add that to a new patch series I'm working on for
>>>> some bug-fixes for perf being lower on pipes other than pipe 0.
>>>>
>>>> I haven't tested this yet on kaveri/carrizo. I'm hoping someone with the
>>>> HW
>>>> will be able to give it a go. I put in a few small hacks to get KFD to
>>>> boot
>>>> but do nothing on polaris10.
>>>>
>>>> Regards,
>>>> Andres
>>>>
>>>>
>>>> On 2017-02-06 03:20 PM, Felix Kuehling wrote:
>>>>> Hi Andres,
>>>>>
>>>>> Thank you for tackling this task. It's more involved than I expected,
>>>>> mostly because I didn't have much awareness of the MQD management in
>>>>> amdgpu.
>>>>>
>>>>> I made one comment in a separate message about the unified MQD commit
>>>>> function, if you want to bring that more in line with our latest ROCm
>>>>> release on github.
>>>>>
>>>>> Also, were you able to test the upstream KFD with your changes on a
>>>>> Kaveri or Carrizo?
>>>>>
>>>>> Regards,
>>>>>     Felix
>>>>>
>>>>>
>>>>> On 17-02-03 11:51 PM, Andres Rodriguez wrote:
>>>>>> The current queue/pipe split policy is for amdgpu to take the first
>>>>>> pipe
>>>>>> of
>>>>>> MEC0 and leave the rest for amdkfd to use. This policy is taken as an
>>>>>> assumption in a few areas of the implementation.
>>>>>>
>>>>>> This patch series aims to allow for flexible/tunable queue/pipe split
>>>>>> policies
>>>>>> between kgd and kfd. It also updates the queue/pipe split policy to one
>>>>>> that
>>>>>> allows better compute app concurrency for both drivers.
>>>>>>
>>>>>> In the process some duplicate code and hardcoded constants were
>>>>>> removed.
>>>>>>
>>>>>> Any suggestions or feedback on improvements welcome.
>>>>>>
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>> Hi Andres,
>>> I will try to find sometime to test it on my Kaveri machine.
>>>
>>> Oded
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: Change queue/pipe split between amdkfd and amdgpu
       [not found]                         ` <8e51b688-d978-d40f-8aa3-ae1090ab6a03-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-02-10 10:35                           ` Oded Gabbay
       [not found]                             ` <CAFCwf10agJ+C0X-jvPa5jSJcR--+u3-TC6LuVQwW1+o94uGonA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 31+ messages in thread
From: Oded Gabbay @ 2017-02-10 10:35 UTC (permalink / raw)
  To: Andres Rodriguez
  Cc: Alex Deucher, Jay Cornwall, Felix Kuehling,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, John Bridgman

So the warning in dmesg is gone of course, but the test (that I
mentioned in previous email) still fails, and this time it caused the
kernel to crash. In addition, now other tests fail as well, e.g.
KFDEventTest.SignalEvent

I honestly suggest to take some time to debug this patch-set on an
actual Kaveri machine and then re-send the patches.

Thanks,
Oded

log of crash from KFDQMTest.CreateMultipleCpQueues:

[  160.900137] kfd: qcm fence wait loop timeout expired
[  160.900143] kfd: the cp might be in an unrecoverable state due to
an unsuccessful queues preemption
[  160.916765] show_signal_msg: 36 callbacks suppressed
[  160.916771] kfdtest[2498]: segfault at 100007f8a ip
00007f8ae932ee5d sp 00007ffc52219cd0 error 4 in
libhsakmt-1.so.0.0.1[7f8ae932b000+8000]
[  163.152229] kfd: qcm fence wait loop timeout expired
[  163.152250] BUG: unable to handle kernel NULL pointer dereference
at 000000000000005a
[  163.152299] IP: kfd_get_process_device_data+0x6/0x30 [amdkfd]
[  163.152323] PGD 2333aa067
[  163.152323] PUD 230f64067
[  163.152335] PMD 0

[  163.152364] Oops: 0000 [#1] SMP
[  163.152379] Modules linked in: joydev edac_mce_amd edac_core
input_leds kvm_amd snd_hda_codec_realtek kvm irqbypass
snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_intel snd_hda_codec
crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_core
snd_hwdep pcbc snd_pcm aesni_intel snd_seq_midi snd_seq_midi_event
snd_rawmidi snd_seq aes_x86_64 crypto_simd snd_seq_device glue_helper
cryptd snd_timer snd fam15h_power k10temp soundcore i2c_piix4 shpchp
tpm_infineon mac_hid parport_pc ppdev nfsd auth_rpcgss nfs_acl lockd
lp grace sunrpc parport autofs4 hid_logitech_hidpp hid_logitech_dj
hid_generic usbhid hid uas usb_storage amdkfd amd_iommu_v2 radeon
i2c_algo_bit ttm drm_kms_helper syscopyarea ahci sysfillrect sysimgblt
libahci fb_sys_fops drm r8169 mii fjes video
[  163.152668] CPU: 3 PID: 2498 Comm: kfdtest Not tainted 4.10.0-rc5+ #3
[  163.152695] Hardware name: Gigabyte Technology Co., Ltd. To be
filled by O.E.M./F2A88XM-D3H, BIOS F5 01/09/2014
[  163.152735] task: ffff995e73d16580 task.stack: ffffb41144458000
[  163.152764] RIP: 0010:kfd_get_process_device_data+0x6/0x30 [amdkfd]
[  163.152790] RSP: 0018:ffffb4114445bab0 EFLAGS: 00010246
[  163.152812] RAX: ffffffffffffffea RBX: ffff995e75909c00 RCX: 0000000000000000
[  163.152841] RDX: 0000000000000000 RSI: ffffffffffffffea RDI: ffff995e75909600
[  163.152869] RBP: ffffb4114445bae0 R08: 00000000000252a5 R09: 0000000000000414
[  163.152898] R10: 0000000000000000 R11: ffffffffb412d38d R12: 00000000ffffffc2
[  163.152926] R13: 0000000000000000 R14: ffff995e75909ca8 R15: ffff995e75909c00
[  163.152956] FS:  00007f8ae975e740(0000) GS:ffff995e7ed80000(0000)
knlGS:0000000000000000
[  163.152988] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  163.153012] CR2: 000000000000005a CR3: 00000002216ab000 CR4: 00000000000406e0
[  163.153041] Call Trace:
[  163.153059]  ? destroy_queues_cpsch+0x166/0x190 [amdkfd]
[  163.153086]  execute_queues_cpsch+0x2e/0xc0 [amdkfd]
[  163.153113]  destroy_queue_cpsch+0xbd/0x140 [amdkfd]
[  163.153139]  pqm_destroy_queue+0x111/0x1d0 [amdkfd]
[  163.153164]  pqm_uninit+0x3f/0xb0 [amdkfd]
[  163.153186]  kfd_unbind_process_from_device+0x51/0xd0 [amdkfd]
[  163.153214]  iommu_pasid_shutdown_callback+0x20/0x30 [amdkfd]
[  163.153239]  mn_release+0x37/0x70 [amd_iommu_v2]
[  163.153261]  __mmu_notifier_release+0x44/0xc0
[  163.153281]  exit_mmap+0x15a/0x170
[  163.153297]  ? __wake_up+0x44/0x50
[  163.153314]  ? exit_robust_list+0x5c/0x110
[  163.153333]  mmput+0x57/0x140
[  163.153347]  do_exit+0x26b/0xb30
[  163.153362]  do_group_exit+0x43/0xb0
[  163.153379]  get_signal+0x293/0x620
[  163.153396]  do_signal+0x37/0x760
[  163.153411]  ? print_vma_addr+0x82/0x100
[  163.153429]  ? vprintk_default+0x29/0x50
[  163.153447]  ? bad_area+0x46/0x50
[  163.153463]  ? __do_page_fault+0x3c7/0x4e0
[  163.153481]  exit_to_usermode_loop+0x76/0xb0
[  163.153500]  prepare_exit_to_usermode+0x2f/0x40
[  163.153521]  retint_user+0x8/0x10
[  163.153536] RIP: 0033:0x7f8ae932ee5d
[  163.153551] RSP: 002b:00007ffc52219cd0 EFLAGS: 00010202
[  163.153573] RAX: 0000000000000003 RBX: 0000000100007f8a RCX: 00007ffc52219d00
[  163.153602] RDX: 00007f8ae9534220 RSI: 00007f8ae8b5eb28 RDI: 0000000100007f8a
[  163.153630] RBP: 00007ffc52219d20 R08: 0000000001cc1890 R09: 0000000000000000
[  163.153659] R10: 0000000000000027 R11: 00007f8ae932ee10 R12: 0000000001cc52a0
[  163.153687] R13: 00007ffc5221a200 R14: 0000000000000021 R15: 0000000000000000
[  163.153716] Code: e0 04 00 00 48 3b 91 f0 03 00 00 74 01 c3 55 48
89 e5 e8 2e f9 ff ff 5d c3 66 90 66 2e 0f 1f 84 00 00 00 00 00 0f 1f
44 00 00 55 <48> 8b 46 70 48 83 c6 70 48 89 e5 48 39 f0 74 16 48 3b 78
10 75
[  163.153818] RIP: kfd_get_process_device_data+0x6/0x30 [amdkfd] RSP:
ffffb4114445bab0
[  163.153848] CR2: 000000000000005a
[  163.160389] ---[ end trace f6a8177c7119c1f5 ]---
[  163.160390] Fixing recursive fault but reboot is needed!

On Thu, Feb 9, 2017 at 10:38 PM, Andres Rodriguez <andresx7@gmail.com> wrote:
> Hey Oded,
>
> Sorry to be a nuisance, but if you have everything still setup could you
> give this fix a quick go?
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 5321d18..9f70ee0 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -667,7 +667,7 @@ static int set_sched_resources(struct
> device_queue_manager *dqm)
>                 /* This situation may be hit in the future if a new HW
>                  * generation exposes more than 64 queues. If so, the
>                  * definition of res.queue_mask needs updating */
> -               if (WARN_ON(i > sizeof(res.queue_mask))) {
> +               if (WARN_ON(i > (sizeof(res.queue_mask)*8))) {
>                         pr_err("Invalid queue enabled by amdgpu: %d\n", i);
>                         break;
>                 }
>
> John/Felix,
>
> Any chance I could borrow a carrizo/kaveri for a few days? Or maybe you
> could help me run some final tests on this patch series?
>
> - Andres
>
>
>
> On 2017-02-09 03:11 PM, Oded Gabbay wrote:
>>
>>   Andres,
>>
>> I tried your patches on Kaveri with airlied's drm-next branch.
>> I used radeon+amdkfd
>>
>> The following test failed: KFDQMTest.CreateMultipleCpQueues
>> However, I can't debug it because I don't have the sources of kfdtest.
>>
>> In dmesg, I saw the following warning during boot:
>> WARNING: CPU: 0 PID: 150 at
>> drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c:670
>> start_cpsch+0xc5/0x220 [amdkfd]
>> [    4.393796] Modules linked in: hid_logitech_hidpp hid_logitech_dj
>> hid_generic usbhid hid uas usb_storage amdkfd amd_iommu_v2 radeon(+)
>> i2c_algo_bit ttm drm_kms_helper syscopyarea ahci sysfillrect sysimgblt
>> libahci fb_sys_fops drm r8169 mii fjes video
>> [    4.393811] CPU: 0 PID: 150 Comm: systemd-udevd Not tainted 4.10.0-rc5+
>> #1
>> [    4.393811] Hardware name: Gigabyte Technology Co., Ltd. To be
>> filled by O.E.M./F2A88XM-D3H, BIOS F5 01/09/2014
>> [    4.393812] Call Trace:
>> [    4.393818]  dump_stack+0x63/0x90
>> [    4.393822]  __warn+0xcb/0xf0
>> [    4.393823]  warn_slowpath_null+0x1d/0x20
>> [    4.393830]  start_cpsch+0xc5/0x220 [amdkfd]
>> [    4.393836]  ? initialize_cpsch+0xa0/0xb0 [amdkfd]
>> [    4.393841]  kgd2kfd_device_init+0x375/0x490 [amdkfd]
>> [    4.393883]  radeon_kfd_device_init+0xaf/0xd0 [radeon]
>> [    4.393911]  radeon_driver_load_kms+0x11e/0x1f0 [radeon]
>> [    4.393933]  drm_dev_register+0x14a/0x200 [drm]
>> [    4.393946]  drm_get_pci_dev+0x9d/0x160 [drm]
>> [    4.393974]  radeon_pci_probe+0xb8/0xe0 [radeon]
>> [    4.393976]  local_pci_probe+0x45/0xa0
>> [    4.393978]  pci_device_probe+0x103/0x150
>> [    4.393981]  driver_probe_device+0x2bf/0x460
>> [    4.393982]  __driver_attach+0xdf/0xf0
>> [    4.393984]  ? driver_probe_device+0x460/0x460
>> [    4.393985]  bus_for_each_dev+0x6c/0xc0
>> [    4.393987]  driver_attach+0x1e/0x20
>> [    4.393988]  bus_add_driver+0x1fd/0x270
>> [    4.393989]  ? 0xffffffffc05c8000
>> [    4.393991]  driver_register+0x60/0xe0
>> [    4.393992]  ? 0xffffffffc05c8000
>> [    4.393993]  __pci_register_driver+0x4c/0x50
>> [    4.394007]  drm_pci_init+0xeb/0x100 [drm]
>> [    4.394008]  ? 0xffffffffc05c8000
>> [    4.394031]  radeon_init+0x98/0xb6 [radeon]
>> [    4.394034]  do_one_initcall+0x53/0x1a0
>> [    4.394037]  ? __vunmap+0x81/0xd0
>> [    4.394039]  ? kmem_cache_alloc_trace+0x152/0x1c0
>> [    4.394041]  ? vfree+0x2e/0x70
>> [    4.394044]  do_init_module+0x5f/0x1ff
>> [    4.394046]  load_module+0x24cc/0x29f0
>> [    4.394047]  ? __symbol_put+0x60/0x60
>> [    4.394050]  ? security_kernel_post_read_file+0x6b/0x80
>> [    4.394052]  SYSC_finit_module+0xdf/0x110
>> [    4.394054]  SyS_finit_module+0xe/0x10
>> [    4.394056]  entry_SYSCALL_64_fastpath+0x1e/0xad
>> [    4.394058] RIP: 0033:0x7f9cda77c8e9
>> [    4.394059] RSP: 002b:00007ffe195d3378 EFLAGS: 00000246 ORIG_RAX:
>> 0000000000000139
>> [    4.394060] RAX: ffffffffffffffda RBX: 00007f9cdb8dda7e RCX:
>> 00007f9cda77c8e9
>> [    4.394061] RDX: 0000000000000000 RSI: 00007f9cdac7ce2a RDI:
>> 0000000000000013
>> [    4.394062] RBP: 00007ffe195d2450 R08: 0000000000000000 R09:
>> 0000000000000000
>> [    4.394063] R10: 0000000000000013 R11: 0000000000000246 R12:
>> 00007ffe195d245a
>> [    4.394063] R13: 00007ffe195d1378 R14: 0000563f70cc93b0 R15:
>> 0000563f70cba4d0
>> [    4.394091] ---[ end trace 9c5af17304d998bb ]---
>> [    4.394092] Invalid queue enabled by amdgpu: 9
>>
>> I suggest you get a Kaveri/Carrizo machine to debug these issues.
>>
>> Until that, I don't think we should merge this patch-set.
>>
>> Oded
>>
>> On Wed, Feb 8, 2017 at 9:47 PM, Andres Rodriguez <andresx7@gmail.com>
>> wrote:
>>>
>>> Thank you Oded.
>>>
>>> - Andres
>>>
>>>
>>> On 2017-02-08 02:32 PM, Oded Gabbay wrote:
>>>>
>>>> On Wed, Feb 8, 2017 at 6:23 PM, Andres Rodriguez <andresx7@gmail.com>
>>>> wrote:
>>>>>
>>>>> Hey Felix,
>>>>>
>>>>> Thanks for the pointer to the ROCm mqd commit. I like that the
>>>>> workarounds
>>>>> are easy to spot. I'll add that to a new patch series I'm working on
>>>>> for
>>>>> some bug-fixes for perf being lower on pipes other than pipe 0.
>>>>>
>>>>> I haven't tested this yet on kaveri/carrizo. I'm hoping someone with
>>>>> the
>>>>> HW
>>>>> will be able to give it a go. I put in a few small hacks to get KFD to
>>>>> boot
>>>>> but do nothing on polaris10.
>>>>>
>>>>> Regards,
>>>>> Andres
>>>>>
>>>>>
>>>>> On 2017-02-06 03:20 PM, Felix Kuehling wrote:
>>>>>>
>>>>>> Hi Andres,
>>>>>>
>>>>>> Thank you for tackling this task. It's more involved than I expected,
>>>>>> mostly because I didn't have much awareness of the MQD management in
>>>>>> amdgpu.
>>>>>>
>>>>>> I made one comment in a separate message about the unified MQD commit
>>>>>> function, if you want to bring that more in line with our latest ROCm
>>>>>> release on github.
>>>>>>
>>>>>> Also, were you able to test the upstream KFD with your changes on a
>>>>>> Kaveri or Carrizo?
>>>>>>
>>>>>> Regards,
>>>>>>     Felix
>>>>>>
>>>>>>
>>>>>> On 17-02-03 11:51 PM, Andres Rodriguez wrote:
>>>>>>>
>>>>>>> The current queue/pipe split policy is for amdgpu to take the first
>>>>>>> pipe
>>>>>>> of
>>>>>>> MEC0 and leave the rest for amdkfd to use. This policy is taken as an
>>>>>>> assumption in a few areas of the implementation.
>>>>>>>
>>>>>>> This patch series aims to allow for flexible/tunable queue/pipe split
>>>>>>> policies
>>>>>>> between kgd and kfd. It also updates the queue/pipe split policy to
>>>>>>> one
>>>>>>> that
>>>>>>> allows better compute app concurrency for both drivers.
>>>>>>>
>>>>>>> In the process some duplicate code and hardcoded constants were
>>>>>>> removed.
>>>>>>>
>>>>>>> Any suggestions or feedback on improvements welcome.
>>>>>>>
>>>>> _______________________________________________
>>>>> amd-gfx mailing list
>>>>> amd-gfx@lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>
>>>> Hi Andres,
>>>> I will try to find sometime to test it on my Kaveri machine.
>>>>
>>>> Oded
>>>
>>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Change queue/pipe split between amdkfd and amdgpu
       [not found]                             ` <CAFCwf10agJ+C0X-jvPa5jSJcR--+u3-TC6LuVQwW1+o94uGonA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-02-10 16:33                               ` Andres Rodriguez
       [not found]                                 ` <37bd1bf7-4db6-6004-f61d-5084efd1ec83-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 31+ messages in thread
From: Andres Rodriguez @ 2017-02-10 16:33 UTC (permalink / raw)
  To: Oded Gabbay
  Cc: Alex Deucher, Jay Cornwall, Felix Kuehling,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, John Bridgman

Hey Oded,

Where can I find a repo with kfdtest?

I tried looking here bit couldn't find it:

https://cgit.freedesktop.org/~gabbayo/

-Andres


On 2017-02-10 05:35 AM, Oded Gabbay wrote:
> So the warning in dmesg is gone of course, but the test (that I
> mentioned in previous email) still fails, and this time it caused the
> kernel to crash. In addition, now other tests fail as well, e.g.
> KFDEventTest.SignalEvent
>
> I honestly suggest to take some time to debug this patch-set on an
> actual Kaveri machine and then re-send the patches.
>
> Thanks,
> Oded
>
> log of crash from KFDQMTest.CreateMultipleCpQueues:
>
> [  160.900137] kfd: qcm fence wait loop timeout expired
> [  160.900143] kfd: the cp might be in an unrecoverable state due to
> an unsuccessful queues preemption
> [  160.916765] show_signal_msg: 36 callbacks suppressed
> [  160.916771] kfdtest[2498]: segfault at 100007f8a ip
> 00007f8ae932ee5d sp 00007ffc52219cd0 error 4 in
> libhsakmt-1.so.0.0.1[7f8ae932b000+8000]
> [  163.152229] kfd: qcm fence wait loop timeout expired
> [  163.152250] BUG: unable to handle kernel NULL pointer dereference
> at 000000000000005a
> [  163.152299] IP: kfd_get_process_device_data+0x6/0x30 [amdkfd]
> [  163.152323] PGD 2333aa067
> [  163.152323] PUD 230f64067
> [  163.152335] PMD 0
>
> [  163.152364] Oops: 0000 [#1] SMP
> [  163.152379] Modules linked in: joydev edac_mce_amd edac_core
> input_leds kvm_amd snd_hda_codec_realtek kvm irqbypass
> snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_intel snd_hda_codec
> crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_core
> snd_hwdep pcbc snd_pcm aesni_intel snd_seq_midi snd_seq_midi_event
> snd_rawmidi snd_seq aes_x86_64 crypto_simd snd_seq_device glue_helper
> cryptd snd_timer snd fam15h_power k10temp soundcore i2c_piix4 shpchp
> tpm_infineon mac_hid parport_pc ppdev nfsd auth_rpcgss nfs_acl lockd
> lp grace sunrpc parport autofs4 hid_logitech_hidpp hid_logitech_dj
> hid_generic usbhid hid uas usb_storage amdkfd amd_iommu_v2 radeon
> i2c_algo_bit ttm drm_kms_helper syscopyarea ahci sysfillrect sysimgblt
> libahci fb_sys_fops drm r8169 mii fjes video
> [  163.152668] CPU: 3 PID: 2498 Comm: kfdtest Not tainted 4.10.0-rc5+ #3
> [  163.152695] Hardware name: Gigabyte Technology Co., Ltd. To be
> filled by O.E.M./F2A88XM-D3H, BIOS F5 01/09/2014
> [  163.152735] task: ffff995e73d16580 task.stack: ffffb41144458000
> [  163.152764] RIP: 0010:kfd_get_process_device_data+0x6/0x30 [amdkfd]
> [  163.152790] RSP: 0018:ffffb4114445bab0 EFLAGS: 00010246
> [  163.152812] RAX: ffffffffffffffea RBX: ffff995e75909c00 RCX: 0000000000000000
> [  163.152841] RDX: 0000000000000000 RSI: ffffffffffffffea RDI: ffff995e75909600
> [  163.152869] RBP: ffffb4114445bae0 R08: 00000000000252a5 R09: 0000000000000414
> [  163.152898] R10: 0000000000000000 R11: ffffffffb412d38d R12: 00000000ffffffc2
> [  163.152926] R13: 0000000000000000 R14: ffff995e75909ca8 R15: ffff995e75909c00
> [  163.152956] FS:  00007f8ae975e740(0000) GS:ffff995e7ed80000(0000)
> knlGS:0000000000000000
> [  163.152988] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  163.153012] CR2: 000000000000005a CR3: 00000002216ab000 CR4: 00000000000406e0
> [  163.153041] Call Trace:
> [  163.153059]  ? destroy_queues_cpsch+0x166/0x190 [amdkfd]
> [  163.153086]  execute_queues_cpsch+0x2e/0xc0 [amdkfd]
> [  163.153113]  destroy_queue_cpsch+0xbd/0x140 [amdkfd]
> [  163.153139]  pqm_destroy_queue+0x111/0x1d0 [amdkfd]
> [  163.153164]  pqm_uninit+0x3f/0xb0 [amdkfd]
> [  163.153186]  kfd_unbind_process_from_device+0x51/0xd0 [amdkfd]
> [  163.153214]  iommu_pasid_shutdown_callback+0x20/0x30 [amdkfd]
> [  163.153239]  mn_release+0x37/0x70 [amd_iommu_v2]
> [  163.153261]  __mmu_notifier_release+0x44/0xc0
> [  163.153281]  exit_mmap+0x15a/0x170
> [  163.153297]  ? __wake_up+0x44/0x50
> [  163.153314]  ? exit_robust_list+0x5c/0x110
> [  163.153333]  mmput+0x57/0x140
> [  163.153347]  do_exit+0x26b/0xb30
> [  163.153362]  do_group_exit+0x43/0xb0
> [  163.153379]  get_signal+0x293/0x620
> [  163.153396]  do_signal+0x37/0x760
> [  163.153411]  ? print_vma_addr+0x82/0x100
> [  163.153429]  ? vprintk_default+0x29/0x50
> [  163.153447]  ? bad_area+0x46/0x50
> [  163.153463]  ? __do_page_fault+0x3c7/0x4e0
> [  163.153481]  exit_to_usermode_loop+0x76/0xb0
> [  163.153500]  prepare_exit_to_usermode+0x2f/0x40
> [  163.153521]  retint_user+0x8/0x10
> [  163.153536] RIP: 0033:0x7f8ae932ee5d
> [  163.153551] RSP: 002b:00007ffc52219cd0 EFLAGS: 00010202
> [  163.153573] RAX: 0000000000000003 RBX: 0000000100007f8a RCX: 00007ffc52219d00
> [  163.153602] RDX: 00007f8ae9534220 RSI: 00007f8ae8b5eb28 RDI: 0000000100007f8a
> [  163.153630] RBP: 00007ffc52219d20 R08: 0000000001cc1890 R09: 0000000000000000
> [  163.153659] R10: 0000000000000027 R11: 00007f8ae932ee10 R12: 0000000001cc52a0
> [  163.153687] R13: 00007ffc5221a200 R14: 0000000000000021 R15: 0000000000000000
> [  163.153716] Code: e0 04 00 00 48 3b 91 f0 03 00 00 74 01 c3 55 48
> 89 e5 e8 2e f9 ff ff 5d c3 66 90 66 2e 0f 1f 84 00 00 00 00 00 0f 1f
> 44 00 00 55 <48> 8b 46 70 48 83 c6 70 48 89 e5 48 39 f0 74 16 48 3b 78
> 10 75
> [  163.153818] RIP: kfd_get_process_device_data+0x6/0x30 [amdkfd] RSP:
> ffffb4114445bab0
> [  163.153848] CR2: 000000000000005a
> [  163.160389] ---[ end trace f6a8177c7119c1f5 ]---
> [  163.160390] Fixing recursive fault but reboot is needed!
>
> On Thu, Feb 9, 2017 at 10:38 PM, Andres Rodriguez <andresx7@gmail.com> wrote:
>> Hey Oded,
>>
>> Sorry to be a nuisance, but if you have everything still setup could you
>> give this fix a quick go?
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index 5321d18..9f70ee0 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> @@ -667,7 +667,7 @@ static int set_sched_resources(struct
>> device_queue_manager *dqm)
>>                  /* This situation may be hit in the future if a new HW
>>                   * generation exposes more than 64 queues. If so, the
>>                   * definition of res.queue_mask needs updating */
>> -               if (WARN_ON(i > sizeof(res.queue_mask))) {
>> +               if (WARN_ON(i > (sizeof(res.queue_mask)*8))) {
>>                          pr_err("Invalid queue enabled by amdgpu: %d\n", i);
>>                          break;
>>                  }
>>
>> John/Felix,
>>
>> Any chance I could borrow a carrizo/kaveri for a few days? Or maybe you
>> could help me run some final tests on this patch series?
>>
>> - Andres
>>
>>
>>
>> On 2017-02-09 03:11 PM, Oded Gabbay wrote:
>>>    Andres,
>>>
>>> I tried your patches on Kaveri with airlied's drm-next branch.
>>> I used radeon+amdkfd
>>>
>>> The following test failed: KFDQMTest.CreateMultipleCpQueues
>>> However, I can't debug it because I don't have the sources of kfdtest.
>>>
>>> In dmesg, I saw the following warning during boot:
>>> WARNING: CPU: 0 PID: 150 at
>>> drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c:670
>>> start_cpsch+0xc5/0x220 [amdkfd]
>>> [    4.393796] Modules linked in: hid_logitech_hidpp hid_logitech_dj
>>> hid_generic usbhid hid uas usb_storage amdkfd amd_iommu_v2 radeon(+)
>>> i2c_algo_bit ttm drm_kms_helper syscopyarea ahci sysfillrect sysimgblt
>>> libahci fb_sys_fops drm r8169 mii fjes video
>>> [    4.393811] CPU: 0 PID: 150 Comm: systemd-udevd Not tainted 4.10.0-rc5+
>>> #1
>>> [    4.393811] Hardware name: Gigabyte Technology Co., Ltd. To be
>>> filled by O.E.M./F2A88XM-D3H, BIOS F5 01/09/2014
>>> [    4.393812] Call Trace:
>>> [    4.393818]  dump_stack+0x63/0x90
>>> [    4.393822]  __warn+0xcb/0xf0
>>> [    4.393823]  warn_slowpath_null+0x1d/0x20
>>> [    4.393830]  start_cpsch+0xc5/0x220 [amdkfd]
>>> [    4.393836]  ? initialize_cpsch+0xa0/0xb0 [amdkfd]
>>> [    4.393841]  kgd2kfd_device_init+0x375/0x490 [amdkfd]
>>> [    4.393883]  radeon_kfd_device_init+0xaf/0xd0 [radeon]
>>> [    4.393911]  radeon_driver_load_kms+0x11e/0x1f0 [radeon]
>>> [    4.393933]  drm_dev_register+0x14a/0x200 [drm]
>>> [    4.393946]  drm_get_pci_dev+0x9d/0x160 [drm]
>>> [    4.393974]  radeon_pci_probe+0xb8/0xe0 [radeon]
>>> [    4.393976]  local_pci_probe+0x45/0xa0
>>> [    4.393978]  pci_device_probe+0x103/0x150
>>> [    4.393981]  driver_probe_device+0x2bf/0x460
>>> [    4.393982]  __driver_attach+0xdf/0xf0
>>> [    4.393984]  ? driver_probe_device+0x460/0x460
>>> [    4.393985]  bus_for_each_dev+0x6c/0xc0
>>> [    4.393987]  driver_attach+0x1e/0x20
>>> [    4.393988]  bus_add_driver+0x1fd/0x270
>>> [    4.393989]  ? 0xffffffffc05c8000
>>> [    4.393991]  driver_register+0x60/0xe0
>>> [    4.393992]  ? 0xffffffffc05c8000
>>> [    4.393993]  __pci_register_driver+0x4c/0x50
>>> [    4.394007]  drm_pci_init+0xeb/0x100 [drm]
>>> [    4.394008]  ? 0xffffffffc05c8000
>>> [    4.394031]  radeon_init+0x98/0xb6 [radeon]
>>> [    4.394034]  do_one_initcall+0x53/0x1a0
>>> [    4.394037]  ? __vunmap+0x81/0xd0
>>> [    4.394039]  ? kmem_cache_alloc_trace+0x152/0x1c0
>>> [    4.394041]  ? vfree+0x2e/0x70
>>> [    4.394044]  do_init_module+0x5f/0x1ff
>>> [    4.394046]  load_module+0x24cc/0x29f0
>>> [    4.394047]  ? __symbol_put+0x60/0x60
>>> [    4.394050]  ? security_kernel_post_read_file+0x6b/0x80
>>> [    4.394052]  SYSC_finit_module+0xdf/0x110
>>> [    4.394054]  SyS_finit_module+0xe/0x10
>>> [    4.394056]  entry_SYSCALL_64_fastpath+0x1e/0xad
>>> [    4.394058] RIP: 0033:0x7f9cda77c8e9
>>> [    4.394059] RSP: 002b:00007ffe195d3378 EFLAGS: 00000246 ORIG_RAX:
>>> 0000000000000139
>>> [    4.394060] RAX: ffffffffffffffda RBX: 00007f9cdb8dda7e RCX:
>>> 00007f9cda77c8e9
>>> [    4.394061] RDX: 0000000000000000 RSI: 00007f9cdac7ce2a RDI:
>>> 0000000000000013
>>> [    4.394062] RBP: 00007ffe195d2450 R08: 0000000000000000 R09:
>>> 0000000000000000
>>> [    4.394063] R10: 0000000000000013 R11: 0000000000000246 R12:
>>> 00007ffe195d245a
>>> [    4.394063] R13: 00007ffe195d1378 R14: 0000563f70cc93b0 R15:
>>> 0000563f70cba4d0
>>> [    4.394091] ---[ end trace 9c5af17304d998bb ]---
>>> [    4.394092] Invalid queue enabled by amdgpu: 9
>>>
>>> I suggest you get a Kaveri/Carrizo machine to debug these issues.
>>>
>>> Until that, I don't think we should merge this patch-set.
>>>
>>> Oded
>>>
>>> On Wed, Feb 8, 2017 at 9:47 PM, Andres Rodriguez <andresx7@gmail.com>
>>> wrote:
>>>> Thank you Oded.
>>>>
>>>> - Andres
>>>>
>>>>
>>>> On 2017-02-08 02:32 PM, Oded Gabbay wrote:
>>>>> On Wed, Feb 8, 2017 at 6:23 PM, Andres Rodriguez <andresx7@gmail.com>
>>>>> wrote:
>>>>>> Hey Felix,
>>>>>>
>>>>>> Thanks for the pointer to the ROCm mqd commit. I like that the
>>>>>> workarounds
>>>>>> are easy to spot. I'll add that to a new patch series I'm working on
>>>>>> for
>>>>>> some bug-fixes for perf being lower on pipes other than pipe 0.
>>>>>>
>>>>>> I haven't tested this yet on kaveri/carrizo. I'm hoping someone with
>>>>>> the
>>>>>> HW
>>>>>> will be able to give it a go. I put in a few small hacks to get KFD to
>>>>>> boot
>>>>>> but do nothing on polaris10.
>>>>>>
>>>>>> Regards,
>>>>>> Andres
>>>>>>
>>>>>>
>>>>>> On 2017-02-06 03:20 PM, Felix Kuehling wrote:
>>>>>>> Hi Andres,
>>>>>>>
>>>>>>> Thank you for tackling this task. It's more involved than I expected,
>>>>>>> mostly because I didn't have much awareness of the MQD management in
>>>>>>> amdgpu.
>>>>>>>
>>>>>>> I made one comment in a separate message about the unified MQD commit
>>>>>>> function, if you want to bring that more in line with our latest ROCm
>>>>>>> release on github.
>>>>>>>
>>>>>>> Also, were you able to test the upstream KFD with your changes on a
>>>>>>> Kaveri or Carrizo?
>>>>>>>
>>>>>>> Regards,
>>>>>>>      Felix
>>>>>>>
>>>>>>>
>>>>>>> On 17-02-03 11:51 PM, Andres Rodriguez wrote:
>>>>>>>> The current queue/pipe split policy is for amdgpu to take the first
>>>>>>>> pipe
>>>>>>>> of
>>>>>>>> MEC0 and leave the rest for amdkfd to use. This policy is taken as an
>>>>>>>> assumption in a few areas of the implementation.
>>>>>>>>
>>>>>>>> This patch series aims to allow for flexible/tunable queue/pipe split
>>>>>>>> policies
>>>>>>>> between kgd and kfd. It also updates the queue/pipe split policy to
>>>>>>>> one
>>>>>>>> that
>>>>>>>> allows better compute app concurrency for both drivers.
>>>>>>>>
>>>>>>>> In the process some duplicate code and hardcoded constants were
>>>>>>>> removed.
>>>>>>>>
>>>>>>>> Any suggestions or feedback on improvements welcome.
>>>>>>>>
>>>>>> _______________________________________________
>>>>>> amd-gfx mailing list
>>>>>> amd-gfx@lists.freedesktop.org
>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>> Hi Andres,
>>>>> I will try to find sometime to test it on my Kaveri machine.
>>>>>
>>>>> Oded
>>>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Change queue/pipe split between amdkfd and amdgpu
       [not found]                                 ` <37bd1bf7-4db6-6004-f61d-5084efd1ec83-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-02-10 17:56                                   ` Oded Gabbay
       [not found]                                     ` <CAFCwf125SHM52z2UAp_Y0rRQe9UHpeWMDDpBkf1csPJyAgXTeA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 31+ messages in thread
From: Oded Gabbay @ 2017-02-10 17:56 UTC (permalink / raw)
  To: Andres Rodriguez
  Cc: Alex Deucher, Jay Cornwall, Felix Kuehling,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, John Bridgman

I don't have a repo, nor do I have the source code.
It is a tool that we developed inside AMD (when I was working there),
and after I left AMD I got permission to use the binary for
regressions testing.

Oded

On Fri, Feb 10, 2017 at 6:33 PM, Andres Rodriguez <andresx7@gmail.com> wrote:
> Hey Oded,
>
> Where can I find a repo with kfdtest?
>
> I tried looking here bit couldn't find it:
>
> https://cgit.freedesktop.org/~gabbayo/
>
> -Andres
>
>
>
> On 2017-02-10 05:35 AM, Oded Gabbay wrote:
>>
>> So the warning in dmesg is gone of course, but the test (that I
>> mentioned in previous email) still fails, and this time it caused the
>> kernel to crash. In addition, now other tests fail as well, e.g.
>> KFDEventTest.SignalEvent
>>
>> I honestly suggest to take some time to debug this patch-set on an
>> actual Kaveri machine and then re-send the patches.
>>
>> Thanks,
>> Oded
>>
>> log of crash from KFDQMTest.CreateMultipleCpQueues:
>>
>> [  160.900137] kfd: qcm fence wait loop timeout expired
>> [  160.900143] kfd: the cp might be in an unrecoverable state due to
>> an unsuccessful queues preemption
>> [  160.916765] show_signal_msg: 36 callbacks suppressed
>> [  160.916771] kfdtest[2498]: segfault at 100007f8a ip
>> 00007f8ae932ee5d sp 00007ffc52219cd0 error 4 in
>> libhsakmt-1.so.0.0.1[7f8ae932b000+8000]
>> [  163.152229] kfd: qcm fence wait loop timeout expired
>> [  163.152250] BUG: unable to handle kernel NULL pointer dereference
>> at 000000000000005a
>> [  163.152299] IP: kfd_get_process_device_data+0x6/0x30 [amdkfd]
>> [  163.152323] PGD 2333aa067
>> [  163.152323] PUD 230f64067
>> [  163.152335] PMD 0
>>
>> [  163.152364] Oops: 0000 [#1] SMP
>> [  163.152379] Modules linked in: joydev edac_mce_amd edac_core
>> input_leds kvm_amd snd_hda_codec_realtek kvm irqbypass
>> snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_intel snd_hda_codec
>> crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_core
>> snd_hwdep pcbc snd_pcm aesni_intel snd_seq_midi snd_seq_midi_event
>> snd_rawmidi snd_seq aes_x86_64 crypto_simd snd_seq_device glue_helper
>> cryptd snd_timer snd fam15h_power k10temp soundcore i2c_piix4 shpchp
>> tpm_infineon mac_hid parport_pc ppdev nfsd auth_rpcgss nfs_acl lockd
>> lp grace sunrpc parport autofs4 hid_logitech_hidpp hid_logitech_dj
>> hid_generic usbhid hid uas usb_storage amdkfd amd_iommu_v2 radeon
>> i2c_algo_bit ttm drm_kms_helper syscopyarea ahci sysfillrect sysimgblt
>> libahci fb_sys_fops drm r8169 mii fjes video
>> [  163.152668] CPU: 3 PID: 2498 Comm: kfdtest Not tainted 4.10.0-rc5+ #3
>> [  163.152695] Hardware name: Gigabyte Technology Co., Ltd. To be
>> filled by O.E.M./F2A88XM-D3H, BIOS F5 01/09/2014
>> [  163.152735] task: ffff995e73d16580 task.stack: ffffb41144458000
>> [  163.152764] RIP: 0010:kfd_get_process_device_data+0x6/0x30 [amdkfd]
>> [  163.152790] RSP: 0018:ffffb4114445bab0 EFLAGS: 00010246
>> [  163.152812] RAX: ffffffffffffffea RBX: ffff995e75909c00 RCX:
>> 0000000000000000
>> [  163.152841] RDX: 0000000000000000 RSI: ffffffffffffffea RDI:
>> ffff995e75909600
>> [  163.152869] RBP: ffffb4114445bae0 R08: 00000000000252a5 R09:
>> 0000000000000414
>> [  163.152898] R10: 0000000000000000 R11: ffffffffb412d38d R12:
>> 00000000ffffffc2
>> [  163.152926] R13: 0000000000000000 R14: ffff995e75909ca8 R15:
>> ffff995e75909c00
>> [  163.152956] FS:  00007f8ae975e740(0000) GS:ffff995e7ed80000(0000)
>> knlGS:0000000000000000
>> [  163.152988] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [  163.153012] CR2: 000000000000005a CR3: 00000002216ab000 CR4:
>> 00000000000406e0
>> [  163.153041] Call Trace:
>> [  163.153059]  ? destroy_queues_cpsch+0x166/0x190 [amdkfd]
>> [  163.153086]  execute_queues_cpsch+0x2e/0xc0 [amdkfd]
>> [  163.153113]  destroy_queue_cpsch+0xbd/0x140 [amdkfd]
>> [  163.153139]  pqm_destroy_queue+0x111/0x1d0 [amdkfd]
>> [  163.153164]  pqm_uninit+0x3f/0xb0 [amdkfd]
>> [  163.153186]  kfd_unbind_process_from_device+0x51/0xd0 [amdkfd]
>> [  163.153214]  iommu_pasid_shutdown_callback+0x20/0x30 [amdkfd]
>> [  163.153239]  mn_release+0x37/0x70 [amd_iommu_v2]
>> [  163.153261]  __mmu_notifier_release+0x44/0xc0
>> [  163.153281]  exit_mmap+0x15a/0x170
>> [  163.153297]  ? __wake_up+0x44/0x50
>> [  163.153314]  ? exit_robust_list+0x5c/0x110
>> [  163.153333]  mmput+0x57/0x140
>> [  163.153347]  do_exit+0x26b/0xb30
>> [  163.153362]  do_group_exit+0x43/0xb0
>> [  163.153379]  get_signal+0x293/0x620
>> [  163.153396]  do_signal+0x37/0x760
>> [  163.153411]  ? print_vma_addr+0x82/0x100
>> [  163.153429]  ? vprintk_default+0x29/0x50
>> [  163.153447]  ? bad_area+0x46/0x50
>> [  163.153463]  ? __do_page_fault+0x3c7/0x4e0
>> [  163.153481]  exit_to_usermode_loop+0x76/0xb0
>> [  163.153500]  prepare_exit_to_usermode+0x2f/0x40
>> [  163.153521]  retint_user+0x8/0x10
>> [  163.153536] RIP: 0033:0x7f8ae932ee5d
>> [  163.153551] RSP: 002b:00007ffc52219cd0 EFLAGS: 00010202
>> [  163.153573] RAX: 0000000000000003 RBX: 0000000100007f8a RCX:
>> 00007ffc52219d00
>> [  163.153602] RDX: 00007f8ae9534220 RSI: 00007f8ae8b5eb28 RDI:
>> 0000000100007f8a
>> [  163.153630] RBP: 00007ffc52219d20 R08: 0000000001cc1890 R09:
>> 0000000000000000
>> [  163.153659] R10: 0000000000000027 R11: 00007f8ae932ee10 R12:
>> 0000000001cc52a0
>> [  163.153687] R13: 00007ffc5221a200 R14: 0000000000000021 R15:
>> 0000000000000000
>> [  163.153716] Code: e0 04 00 00 48 3b 91 f0 03 00 00 74 01 c3 55 48
>> 89 e5 e8 2e f9 ff ff 5d c3 66 90 66 2e 0f 1f 84 00 00 00 00 00 0f 1f
>> 44 00 00 55 <48> 8b 46 70 48 83 c6 70 48 89 e5 48 39 f0 74 16 48 3b 78
>> 10 75
>> [  163.153818] RIP: kfd_get_process_device_data+0x6/0x30 [amdkfd] RSP:
>> ffffb4114445bab0
>> [  163.153848] CR2: 000000000000005a
>> [  163.160389] ---[ end trace f6a8177c7119c1f5 ]---
>> [  163.160390] Fixing recursive fault but reboot is needed!
>>
>> On Thu, Feb 9, 2017 at 10:38 PM, Andres Rodriguez <andresx7@gmail.com>
>> wrote:
>>>
>>> Hey Oded,
>>>
>>> Sorry to be a nuisance, but if you have everything still setup could you
>>> give this fix a quick go?
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>> index 5321d18..9f70ee0 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>> @@ -667,7 +667,7 @@ static int set_sched_resources(struct
>>> device_queue_manager *dqm)
>>>                  /* This situation may be hit in the future if a new HW
>>>                   * generation exposes more than 64 queues. If so, the
>>>                   * definition of res.queue_mask needs updating */
>>> -               if (WARN_ON(i > sizeof(res.queue_mask))) {
>>> +               if (WARN_ON(i > (sizeof(res.queue_mask)*8))) {
>>>                          pr_err("Invalid queue enabled by amdgpu: %d\n",
>>> i);
>>>                          break;
>>>                  }
>>>
>>> John/Felix,
>>>
>>> Any chance I could borrow a carrizo/kaveri for a few days? Or maybe you
>>> could help me run some final tests on this patch series?
>>>
>>> - Andres
>>>
>>>
>>>
>>> On 2017-02-09 03:11 PM, Oded Gabbay wrote:
>>>>
>>>>    Andres,
>>>>
>>>> I tried your patches on Kaveri with airlied's drm-next branch.
>>>> I used radeon+amdkfd
>>>>
>>>> The following test failed: KFDQMTest.CreateMultipleCpQueues
>>>> However, I can't debug it because I don't have the sources of kfdtest.
>>>>
>>>> In dmesg, I saw the following warning during boot:
>>>> WARNING: CPU: 0 PID: 150 at
>>>> drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c:670
>>>> start_cpsch+0xc5/0x220 [amdkfd]
>>>> [    4.393796] Modules linked in: hid_logitech_hidpp hid_logitech_dj
>>>> hid_generic usbhid hid uas usb_storage amdkfd amd_iommu_v2 radeon(+)
>>>> i2c_algo_bit ttm drm_kms_helper syscopyarea ahci sysfillrect sysimgblt
>>>> libahci fb_sys_fops drm r8169 mii fjes video
>>>> [    4.393811] CPU: 0 PID: 150 Comm: systemd-udevd Not tainted
>>>> 4.10.0-rc5+
>>>> #1
>>>> [    4.393811] Hardware name: Gigabyte Technology Co., Ltd. To be
>>>> filled by O.E.M./F2A88XM-D3H, BIOS F5 01/09/2014
>>>> [    4.393812] Call Trace:
>>>> [    4.393818]  dump_stack+0x63/0x90
>>>> [    4.393822]  __warn+0xcb/0xf0
>>>> [    4.393823]  warn_slowpath_null+0x1d/0x20
>>>> [    4.393830]  start_cpsch+0xc5/0x220 [amdkfd]
>>>> [    4.393836]  ? initialize_cpsch+0xa0/0xb0 [amdkfd]
>>>> [    4.393841]  kgd2kfd_device_init+0x375/0x490 [amdkfd]
>>>> [    4.393883]  radeon_kfd_device_init+0xaf/0xd0 [radeon]
>>>> [    4.393911]  radeon_driver_load_kms+0x11e/0x1f0 [radeon]
>>>> [    4.393933]  drm_dev_register+0x14a/0x200 [drm]
>>>> [    4.393946]  drm_get_pci_dev+0x9d/0x160 [drm]
>>>> [    4.393974]  radeon_pci_probe+0xb8/0xe0 [radeon]
>>>> [    4.393976]  local_pci_probe+0x45/0xa0
>>>> [    4.393978]  pci_device_probe+0x103/0x150
>>>> [    4.393981]  driver_probe_device+0x2bf/0x460
>>>> [    4.393982]  __driver_attach+0xdf/0xf0
>>>> [    4.393984]  ? driver_probe_device+0x460/0x460
>>>> [    4.393985]  bus_for_each_dev+0x6c/0xc0
>>>> [    4.393987]  driver_attach+0x1e/0x20
>>>> [    4.393988]  bus_add_driver+0x1fd/0x270
>>>> [    4.393989]  ? 0xffffffffc05c8000
>>>> [    4.393991]  driver_register+0x60/0xe0
>>>> [    4.393992]  ? 0xffffffffc05c8000
>>>> [    4.393993]  __pci_register_driver+0x4c/0x50
>>>> [    4.394007]  drm_pci_init+0xeb/0x100 [drm]
>>>> [    4.394008]  ? 0xffffffffc05c8000
>>>> [    4.394031]  radeon_init+0x98/0xb6 [radeon]
>>>> [    4.394034]  do_one_initcall+0x53/0x1a0
>>>> [    4.394037]  ? __vunmap+0x81/0xd0
>>>> [    4.394039]  ? kmem_cache_alloc_trace+0x152/0x1c0
>>>> [    4.394041]  ? vfree+0x2e/0x70
>>>> [    4.394044]  do_init_module+0x5f/0x1ff
>>>> [    4.394046]  load_module+0x24cc/0x29f0
>>>> [    4.394047]  ? __symbol_put+0x60/0x60
>>>> [    4.394050]  ? security_kernel_post_read_file+0x6b/0x80
>>>> [    4.394052]  SYSC_finit_module+0xdf/0x110
>>>> [    4.394054]  SyS_finit_module+0xe/0x10
>>>> [    4.394056]  entry_SYSCALL_64_fastpath+0x1e/0xad
>>>> [    4.394058] RIP: 0033:0x7f9cda77c8e9
>>>> [    4.394059] RSP: 002b:00007ffe195d3378 EFLAGS: 00000246 ORIG_RAX:
>>>> 0000000000000139
>>>> [    4.394060] RAX: ffffffffffffffda RBX: 00007f9cdb8dda7e RCX:
>>>> 00007f9cda77c8e9
>>>> [    4.394061] RDX: 0000000000000000 RSI: 00007f9cdac7ce2a RDI:
>>>> 0000000000000013
>>>> [    4.394062] RBP: 00007ffe195d2450 R08: 0000000000000000 R09:
>>>> 0000000000000000
>>>> [    4.394063] R10: 0000000000000013 R11: 0000000000000246 R12:
>>>> 00007ffe195d245a
>>>> [    4.394063] R13: 00007ffe195d1378 R14: 0000563f70cc93b0 R15:
>>>> 0000563f70cba4d0
>>>> [    4.394091] ---[ end trace 9c5af17304d998bb ]---
>>>> [    4.394092] Invalid queue enabled by amdgpu: 9
>>>>
>>>> I suggest you get a Kaveri/Carrizo machine to debug these issues.
>>>>
>>>> Until that, I don't think we should merge this patch-set.
>>>>
>>>> Oded
>>>>
>>>> On Wed, Feb 8, 2017 at 9:47 PM, Andres Rodriguez <andresx7@gmail.com>
>>>> wrote:
>>>>>
>>>>> Thank you Oded.
>>>>>
>>>>> - Andres
>>>>>
>>>>>
>>>>> On 2017-02-08 02:32 PM, Oded Gabbay wrote:
>>>>>>
>>>>>> On Wed, Feb 8, 2017 at 6:23 PM, Andres Rodriguez <andresx7@gmail.com>
>>>>>> wrote:
>>>>>>>
>>>>>>> Hey Felix,
>>>>>>>
>>>>>>> Thanks for the pointer to the ROCm mqd commit. I like that the
>>>>>>> workarounds
>>>>>>> are easy to spot. I'll add that to a new patch series I'm working on
>>>>>>> for
>>>>>>> some bug-fixes for perf being lower on pipes other than pipe 0.
>>>>>>>
>>>>>>> I haven't tested this yet on kaveri/carrizo. I'm hoping someone with
>>>>>>> the
>>>>>>> HW
>>>>>>> will be able to give it a go. I put in a few small hacks to get KFD
>>>>>>> to
>>>>>>> boot
>>>>>>> but do nothing on polaris10.
>>>>>>>
>>>>>>> Regards,
>>>>>>> Andres
>>>>>>>
>>>>>>>
>>>>>>> On 2017-02-06 03:20 PM, Felix Kuehling wrote:
>>>>>>>>
>>>>>>>> Hi Andres,
>>>>>>>>
>>>>>>>> Thank you for tackling this task. It's more involved than I
>>>>>>>> expected,
>>>>>>>> mostly because I didn't have much awareness of the MQD management in
>>>>>>>> amdgpu.
>>>>>>>>
>>>>>>>> I made one comment in a separate message about the unified MQD
>>>>>>>> commit
>>>>>>>> function, if you want to bring that more in line with our latest
>>>>>>>> ROCm
>>>>>>>> release on github.
>>>>>>>>
>>>>>>>> Also, were you able to test the upstream KFD with your changes on a
>>>>>>>> Kaveri or Carrizo?
>>>>>>>>
>>>>>>>> Regards,
>>>>>>>>      Felix
>>>>>>>>
>>>>>>>>
>>>>>>>> On 17-02-03 11:51 PM, Andres Rodriguez wrote:
>>>>>>>>>
>>>>>>>>> The current queue/pipe split policy is for amdgpu to take the first
>>>>>>>>> pipe
>>>>>>>>> of
>>>>>>>>> MEC0 and leave the rest for amdkfd to use. This policy is taken as
>>>>>>>>> an
>>>>>>>>> assumption in a few areas of the implementation.
>>>>>>>>>
>>>>>>>>> This patch series aims to allow for flexible/tunable queue/pipe
>>>>>>>>> split
>>>>>>>>> policies
>>>>>>>>> between kgd and kfd. It also updates the queue/pipe split policy to
>>>>>>>>> one
>>>>>>>>> that
>>>>>>>>> allows better compute app concurrency for both drivers.
>>>>>>>>>
>>>>>>>>> In the process some duplicate code and hardcoded constants were
>>>>>>>>> removed.
>>>>>>>>>
>>>>>>>>> Any suggestions or feedback on improvements welcome.
>>>>>>>>>
>>>>>>> _______________________________________________
>>>>>>> amd-gfx mailing list
>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>>>
>>>>>> Hi Andres,
>>>>>> I will try to find sometime to test it on my Kaveri machine.
>>>>>>
>>>>>> Oded
>>>>>
>>>>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* RE: Change queue/pipe split between amdkfd and amdgpu
       [not found]                                     ` <CAFCwf125SHM52z2UAp_Y0rRQe9UHpeWMDDpBkf1csPJyAgXTeA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-02-16  4:00                                       ` Bridgman, John
       [not found]                                         ` <BN6PR12MB1348C4F31B81516D5EAAFCA3E85A0-/b2+HYfkarQX0pEhCR5T8QdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 31+ messages in thread
From: Bridgman, John @ 2017-02-16  4:00 UTC (permalink / raw)
  To: Oded Gabbay, Andres Rodriguez
  Cc: Deucher, Alexander, Jay Cornwall, Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Any objections to authorizing Oded to post the kfdtest binary he is using to some public place (if not there already) so others (like Andres) can test changes which touch on amdkfd ? 

We should check it for embarrassing symbols but otherwise it should be OK.

That said, since we are getting perilously close to actually sending dGPU support changes upstream we will need (IMO) to maintain a sanitized source repo for kfdtest as well... sharing the binary just gets us started.

Thanks,
John

>-----Original Message-----
>From: Oded Gabbay [mailto:oded.gabbay@gmail.com]
>Sent: Friday, February 10, 2017 12:57 PM
>To: Andres Rodriguez
>Cc: Kuehling, Felix; Bridgman, John; amd-gfx@lists.freedesktop.org;
>Deucher, Alexander; Jay Cornwall
>Subject: Re: Change queue/pipe split between amdkfd and amdgpu
>
>I don't have a repo, nor do I have the source code.
>It is a tool that we developed inside AMD (when I was working there), and
>after I left AMD I got permission to use the binary for regressions testing.
>
>Oded
>
>On Fri, Feb 10, 2017 at 6:33 PM, Andres Rodriguez <andresx7@gmail.com>
>wrote:
>> Hey Oded,
>>
>> Where can I find a repo with kfdtest?
>>
>> I tried looking here bit couldn't find it:
>>
>> https://cgit.freedesktop.org/~gabbayo/
>>
>> -Andres
>>
>>
>>
>> On 2017-02-10 05:35 AM, Oded Gabbay wrote:
>>>
>>> So the warning in dmesg is gone of course, but the test (that I
>>> mentioned in previous email) still fails, and this time it caused the
>>> kernel to crash. In addition, now other tests fail as well, e.g.
>>> KFDEventTest.SignalEvent
>>>
>>> I honestly suggest to take some time to debug this patch-set on an
>>> actual Kaveri machine and then re-send the patches.
>>>
>>> Thanks,
>>> Oded
>>>
>>> log of crash from KFDQMTest.CreateMultipleCpQueues:
>>>
>>> [  160.900137] kfd: qcm fence wait loop timeout expired [
>>> 160.900143] kfd: the cp might be in an unrecoverable state due to an
>>> unsuccessful queues preemption [  160.916765] show_signal_msg: 36
>>> callbacks suppressed [  160.916771] kfdtest[2498]: segfault at
>>> 100007f8a ip 00007f8ae932ee5d sp 00007ffc52219cd0 error 4 in
>>> libhsakmt-1.so.0.0.1[7f8ae932b000+8000]
>>> [  163.152229] kfd: qcm fence wait loop timeout expired [
>>> 163.152250] BUG: unable to handle kernel NULL pointer dereference at
>>> 000000000000005a [  163.152299] IP:
>>> kfd_get_process_device_data+0x6/0x30 [amdkfd] [  163.152323] PGD
>>> 2333aa067 [  163.152323] PUD 230f64067 [  163.152335] PMD 0
>>>
>>> [  163.152364] Oops: 0000 [#1] SMP
>>> [  163.152379] Modules linked in: joydev edac_mce_amd edac_core
>>> input_leds kvm_amd snd_hda_codec_realtek kvm irqbypass
>>> snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_intel
>snd_hda_codec
>>> crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_core
>>> snd_hwdep pcbc snd_pcm aesni_intel snd_seq_midi snd_seq_midi_event
>>> snd_rawmidi snd_seq aes_x86_64 crypto_simd snd_seq_device
>glue_helper
>>> cryptd snd_timer snd fam15h_power k10temp soundcore i2c_piix4 shpchp
>>> tpm_infineon mac_hid parport_pc ppdev nfsd auth_rpcgss nfs_acl lockd
>>> lp grace sunrpc parport autofs4 hid_logitech_hidpp hid_logitech_dj
>>> hid_generic usbhid hid uas usb_storage amdkfd amd_iommu_v2 radeon
>>> i2c_algo_bit ttm drm_kms_helper syscopyarea ahci sysfillrect
>>> sysimgblt libahci fb_sys_fops drm r8169 mii fjes video [  163.152668]
>>> CPU: 3 PID: 2498 Comm: kfdtest Not tainted 4.10.0-rc5+ #3 [
>>> 163.152695] Hardware name: Gigabyte Technology Co., Ltd. To be filled
>>> by O.E.M./F2A88XM-D3H, BIOS F5 01/09/2014 [  163.152735] task:
>>> ffff995e73d16580 task.stack: ffffb41144458000 [  163.152764] RIP:
>>> 0010:kfd_get_process_device_data+0x6/0x30 [amdkfd] [  163.152790]
>>> RSP: 0018:ffffb4114445bab0 EFLAGS: 00010246 [  163.152812] RAX:
>>> ffffffffffffffea RBX: ffff995e75909c00 RCX:
>>> 0000000000000000
>>> [  163.152841] RDX: 0000000000000000 RSI: ffffffffffffffea RDI:
>>> ffff995e75909600
>>> [  163.152869] RBP: ffffb4114445bae0 R08: 00000000000252a5 R09:
>>> 0000000000000414
>>> [  163.152898] R10: 0000000000000000 R11: ffffffffb412d38d R12:
>>> 00000000ffffffc2
>>> [  163.152926] R13: 0000000000000000 R14: ffff995e75909ca8 R15:
>>> ffff995e75909c00
>>> [  163.152956] FS:  00007f8ae975e740(0000) GS:ffff995e7ed80000(0000)
>>> knlGS:0000000000000000
>>> [  163.152988] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [
>>> 163.153012] CR2: 000000000000005a CR3: 00000002216ab000 CR4:
>>> 00000000000406e0
>>> [  163.153041] Call Trace:
>>> [  163.153059]  ? destroy_queues_cpsch+0x166/0x190 [amdkfd] [
>>> 163.153086]  execute_queues_cpsch+0x2e/0xc0 [amdkfd] [  163.153113]
>>> destroy_queue_cpsch+0xbd/0x140 [amdkfd] [  163.153139]
>>> pqm_destroy_queue+0x111/0x1d0 [amdkfd] [  163.153164]
>>> pqm_uninit+0x3f/0xb0 [amdkfd] [  163.153186]
>>> kfd_unbind_process_from_device+0x51/0xd0 [amdkfd] [  163.153214]
>>> iommu_pasid_shutdown_callback+0x20/0x30 [amdkfd] [  163.153239]
>>> mn_release+0x37/0x70 [amd_iommu_v2] [  163.153261]
>>> __mmu_notifier_release+0x44/0xc0 [  163.153281]
>>> exit_mmap+0x15a/0x170 [  163.153297]  ? __wake_up+0x44/0x50 [
>>> 163.153314]  ? exit_robust_list+0x5c/0x110 [  163.153333]
>>> mmput+0x57/0x140 [  163.153347]  do_exit+0x26b/0xb30 [  163.153362]
>>> do_group_exit+0x43/0xb0 [  163.153379]  get_signal+0x293/0x620 [
>>> 163.153396]  do_signal+0x37/0x760 [  163.153411]  ?
>>> print_vma_addr+0x82/0x100 [  163.153429]  ? vprintk_default+0x29/0x50
>>> [  163.153447]  ? bad_area+0x46/0x50 [  163.153463]  ?
>>> __do_page_fault+0x3c7/0x4e0 [  163.153481]
>>> exit_to_usermode_loop+0x76/0xb0 [  163.153500]
>>> prepare_exit_to_usermode+0x2f/0x40
>>> [  163.153521]  retint_user+0x8/0x10
>>> [  163.153536] RIP: 0033:0x7f8ae932ee5d [  163.153551] RSP:
>>> 002b:00007ffc52219cd0 EFLAGS: 00010202 [  163.153573] RAX:
>>> 0000000000000003 RBX: 0000000100007f8a RCX:
>>> 00007ffc52219d00
>>> [  163.153602] RDX: 00007f8ae9534220 RSI: 00007f8ae8b5eb28 RDI:
>>> 0000000100007f8a
>>> [  163.153630] RBP: 00007ffc52219d20 R08: 0000000001cc1890 R09:
>>> 0000000000000000
>>> [  163.153659] R10: 0000000000000027 R11: 00007f8ae932ee10 R12:
>>> 0000000001cc52a0
>>> [  163.153687] R13: 00007ffc5221a200 R14: 0000000000000021 R15:
>>> 0000000000000000
>>> [  163.153716] Code: e0 04 00 00 48 3b 91 f0 03 00 00 74 01 c3 55 48
>>> 89 e5 e8 2e f9 ff ff 5d c3 66 90 66 2e 0f 1f 84 00 00 00 00 00 0f 1f
>>> 44 00 00 55 <48> 8b 46 70 48 83 c6 70 48 89 e5 48 39 f0 74 16 48 3b
>>> 78
>>> 10 75
>>> [  163.153818] RIP: kfd_get_process_device_data+0x6/0x30 [amdkfd] RSP:
>>> ffffb4114445bab0
>>> [  163.153848] CR2: 000000000000005a
>>> [  163.160389] ---[ end trace f6a8177c7119c1f5 ]--- [  163.160390]
>>> Fixing recursive fault but reboot is needed!
>>>
>>> On Thu, Feb 9, 2017 at 10:38 PM, Andres Rodriguez
>>> <andresx7@gmail.com>
>>> wrote:
>>>>
>>>> Hey Oded,
>>>>
>>>> Sorry to be a nuisance, but if you have everything still setup could
>>>> you give this fix a quick go?
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>>> index 5321d18..9f70ee0 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>>> @@ -667,7 +667,7 @@ static int set_sched_resources(struct
>>>> device_queue_manager *dqm)
>>>>                  /* This situation may be hit in the future if a new HW
>>>>                   * generation exposes more than 64 queues. If so, the
>>>>                   * definition of res.queue_mask needs updating */
>>>> -               if (WARN_ON(i > sizeof(res.queue_mask))) {
>>>> +               if (WARN_ON(i > (sizeof(res.queue_mask)*8))) {
>>>>                          pr_err("Invalid queue enabled by amdgpu:
>>>> %d\n", i);
>>>>                          break;
>>>>                  }
>>>>
>>>> John/Felix,
>>>>
>>>> Any chance I could borrow a carrizo/kaveri for a few days? Or maybe
>>>> you could help me run some final tests on this patch series?
>>>>
>>>> - Andres
>>>>
>>>>
>>>>
>>>> On 2017-02-09 03:11 PM, Oded Gabbay wrote:
>>>>>
>>>>>    Andres,
>>>>>
>>>>> I tried your patches on Kaveri with airlied's drm-next branch.
>>>>> I used radeon+amdkfd
>>>>>
>>>>> The following test failed: KFDQMTest.CreateMultipleCpQueues
>>>>> However, I can't debug it because I don't have the sources of kfdtest.
>>>>>
>>>>> In dmesg, I saw the following warning during boot:
>>>>> WARNING: CPU: 0 PID: 150 at
>>>>> drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c:670
>>>>> start_cpsch+0xc5/0x220 [amdkfd]
>>>>> [    4.393796] Modules linked in: hid_logitech_hidpp hid_logitech_dj
>>>>> hid_generic usbhid hid uas usb_storage amdkfd amd_iommu_v2
>>>>> radeon(+) i2c_algo_bit ttm drm_kms_helper syscopyarea ahci
>>>>> sysfillrect sysimgblt libahci fb_sys_fops drm r8169 mii fjes video
>>>>> [    4.393811] CPU: 0 PID: 150 Comm: systemd-udevd Not tainted
>>>>> 4.10.0-rc5+
>>>>> #1
>>>>> [    4.393811] Hardware name: Gigabyte Technology Co., Ltd. To be
>>>>> filled by O.E.M./F2A88XM-D3H, BIOS F5 01/09/2014
>>>>> [    4.393812] Call Trace:
>>>>> [    4.393818]  dump_stack+0x63/0x90
>>>>> [    4.393822]  __warn+0xcb/0xf0
>>>>> [    4.393823]  warn_slowpath_null+0x1d/0x20
>>>>> [    4.393830]  start_cpsch+0xc5/0x220 [amdkfd]
>>>>> [    4.393836]  ? initialize_cpsch+0xa0/0xb0 [amdkfd]
>>>>> [    4.393841]  kgd2kfd_device_init+0x375/0x490 [amdkfd]
>>>>> [    4.393883]  radeon_kfd_device_init+0xaf/0xd0 [radeon]
>>>>> [    4.393911]  radeon_driver_load_kms+0x11e/0x1f0 [radeon]
>>>>> [    4.393933]  drm_dev_register+0x14a/0x200 [drm]
>>>>> [    4.393946]  drm_get_pci_dev+0x9d/0x160 [drm]
>>>>> [    4.393974]  radeon_pci_probe+0xb8/0xe0 [radeon]
>>>>> [    4.393976]  local_pci_probe+0x45/0xa0
>>>>> [    4.393978]  pci_device_probe+0x103/0x150
>>>>> [    4.393981]  driver_probe_device+0x2bf/0x460
>>>>> [    4.393982]  __driver_attach+0xdf/0xf0
>>>>> [    4.393984]  ? driver_probe_device+0x460/0x460
>>>>> [    4.393985]  bus_for_each_dev+0x6c/0xc0
>>>>> [    4.393987]  driver_attach+0x1e/0x20
>>>>> [    4.393988]  bus_add_driver+0x1fd/0x270
>>>>> [    4.393989]  ? 0xffffffffc05c8000
>>>>> [    4.393991]  driver_register+0x60/0xe0
>>>>> [    4.393992]  ? 0xffffffffc05c8000
>>>>> [    4.393993]  __pci_register_driver+0x4c/0x50
>>>>> [    4.394007]  drm_pci_init+0xeb/0x100 [drm]
>>>>> [    4.394008]  ? 0xffffffffc05c8000
>>>>> [    4.394031]  radeon_init+0x98/0xb6 [radeon]
>>>>> [    4.394034]  do_one_initcall+0x53/0x1a0
>>>>> [    4.394037]  ? __vunmap+0x81/0xd0
>>>>> [    4.394039]  ? kmem_cache_alloc_trace+0x152/0x1c0
>>>>> [    4.394041]  ? vfree+0x2e/0x70
>>>>> [    4.394044]  do_init_module+0x5f/0x1ff
>>>>> [    4.394046]  load_module+0x24cc/0x29f0
>>>>> [    4.394047]  ? __symbol_put+0x60/0x60
>>>>> [    4.394050]  ? security_kernel_post_read_file+0x6b/0x80
>>>>> [    4.394052]  SYSC_finit_module+0xdf/0x110
>>>>> [    4.394054]  SyS_finit_module+0xe/0x10
>>>>> [    4.394056]  entry_SYSCALL_64_fastpath+0x1e/0xad
>>>>> [    4.394058] RIP: 0033:0x7f9cda77c8e9
>>>>> [    4.394059] RSP: 002b:00007ffe195d3378 EFLAGS: 00000246 ORIG_RAX:
>>>>> 0000000000000139
>>>>> [    4.394060] RAX: ffffffffffffffda RBX: 00007f9cdb8dda7e RCX:
>>>>> 00007f9cda77c8e9
>>>>> [    4.394061] RDX: 0000000000000000 RSI: 00007f9cdac7ce2a RDI:
>>>>> 0000000000000013
>>>>> [    4.394062] RBP: 00007ffe195d2450 R08: 0000000000000000 R09:
>>>>> 0000000000000000
>>>>> [    4.394063] R10: 0000000000000013 R11: 0000000000000246 R12:
>>>>> 00007ffe195d245a
>>>>> [    4.394063] R13: 00007ffe195d1378 R14: 0000563f70cc93b0 R15:
>>>>> 0000563f70cba4d0
>>>>> [    4.394091] ---[ end trace 9c5af17304d998bb ]---
>>>>> [    4.394092] Invalid queue enabled by amdgpu: 9
>>>>>
>>>>> I suggest you get a Kaveri/Carrizo machine to debug these issues.
>>>>>
>>>>> Until that, I don't think we should merge this patch-set.
>>>>>
>>>>> Oded
>>>>>
>>>>> On Wed, Feb 8, 2017 at 9:47 PM, Andres Rodriguez
>>>>> <andresx7@gmail.com>
>>>>> wrote:
>>>>>>
>>>>>> Thank you Oded.
>>>>>>
>>>>>> - Andres
>>>>>>
>>>>>>
>>>>>> On 2017-02-08 02:32 PM, Oded Gabbay wrote:
>>>>>>>
>>>>>>> On Wed, Feb 8, 2017 at 6:23 PM, Andres Rodriguez
>>>>>>> <andresx7@gmail.com>
>>>>>>> wrote:
>>>>>>>>
>>>>>>>> Hey Felix,
>>>>>>>>
>>>>>>>> Thanks for the pointer to the ROCm mqd commit. I like that the
>>>>>>>> workarounds are easy to spot. I'll add that to a new patch
>>>>>>>> series I'm working on for some bug-fixes for perf being lower on
>>>>>>>> pipes other than pipe 0.
>>>>>>>>
>>>>>>>> I haven't tested this yet on kaveri/carrizo. I'm hoping someone
>>>>>>>> with the HW will be able to give it a go. I put in a few small
>>>>>>>> hacks to get KFD to boot but do nothing on polaris10.
>>>>>>>>
>>>>>>>> Regards,
>>>>>>>> Andres
>>>>>>>>
>>>>>>>>
>>>>>>>> On 2017-02-06 03:20 PM, Felix Kuehling wrote:
>>>>>>>>>
>>>>>>>>> Hi Andres,
>>>>>>>>>
>>>>>>>>> Thank you for tackling this task. It's more involved than I
>>>>>>>>> expected, mostly because I didn't have much awareness of the
>>>>>>>>> MQD management in amdgpu.
>>>>>>>>>
>>>>>>>>> I made one comment in a separate message about the unified MQD
>>>>>>>>> commit function, if you want to bring that more in line with
>>>>>>>>> our latest ROCm release on github.
>>>>>>>>>
>>>>>>>>> Also, were you able to test the upstream KFD with your changes
>>>>>>>>> on a Kaveri or Carrizo?
>>>>>>>>>
>>>>>>>>> Regards,
>>>>>>>>>      Felix
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 17-02-03 11:51 PM, Andres Rodriguez wrote:
>>>>>>>>>>
>>>>>>>>>> The current queue/pipe split policy is for amdgpu to take the
>>>>>>>>>> first pipe of
>>>>>>>>>> MEC0 and leave the rest for amdkfd to use. This policy is
>>>>>>>>>> taken as an assumption in a few areas of the implementation.
>>>>>>>>>>
>>>>>>>>>> This patch series aims to allow for flexible/tunable
>>>>>>>>>> queue/pipe split policies between kgd and kfd. It also updates
>>>>>>>>>> the queue/pipe split policy to one that allows better compute
>>>>>>>>>> app concurrency for both drivers.
>>>>>>>>>>
>>>>>>>>>> In the process some duplicate code and hardcoded constants
>>>>>>>>>> were removed.
>>>>>>>>>>
>>>>>>>>>> Any suggestions or feedback on improvements welcome.
>>>>>>>>>>
>>>>>>>> _______________________________________________
>>>>>>>> amd-gfx mailing list
>>>>>>>> amd-gfx@lists.freedesktop.org
>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>>>>
>>>>>>> Hi Andres,
>>>>>>> I will try to find sometime to test it on my Kaveri machine.
>>>>>>>
>>>>>>> Oded
>>>>>>
>>>>>>
>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: Change queue/pipe split between amdkfd and amdgpu
       [not found]                                         ` <BN6PR12MB1348C4F31B81516D5EAAFCA3E85A0-/b2+HYfkarQX0pEhCR5T8QdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2017-02-16  7:14                                           ` Edward O'Callaghan
  0 siblings, 0 replies; 31+ messages in thread
From: Edward O'Callaghan @ 2017-02-16  7:14 UTC (permalink / raw)
  To: Bridgman, John, Oded Gabbay, Andres Rodriguez
  Cc: Deucher, Alexander, Jay Cornwall, Kuehling, Felix,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1.1: Type: text/plain, Size: 16749 bytes --]



On 02/16/2017 03:00 PM, Bridgman, John wrote:
> Any objections to authorizing Oded to post the kfdtest binary he is using to some public place (if not there already) so others (like Andres) can test changes which touch on amdkfd ? 
> 
> We should check it for embarrassing symbols but otherwise it should be OK.

someone was up late for a dead line? lol

> 
> That said, since we are getting perilously close to actually sending dGPU support changes upstream we will need (IMO) to maintain a sanitized source repo for kfdtest as well... sharing the binary just gets us started.
> 

Hi John,

Yes, this is the sort of thing I've been referring to for some time now.
We definitely need some kind of centralized mechanism to test/validate
kfd stuff so if you can get this out that would be great! A binary would
be a start, I am sure we can made do and its certainly better than
nothing, however source much like what happened with UMR would be of
course ideal.

I suggest to you that it would perhaps be good if we could arrange some
kind of IRC meeting regarding kfd? Since it seems there is a bit of
fragmented effort here. I have my own ioctl()'s locally for pinning for
my own project which I am not sure are suitable to just upstream as AMD
has its own take so what should we do? I heard so much about dGPU
support for a couple of years now but only seen bits thrown over the
wall. Can we begin a more serious incremental approach happening ASAP?
I created #amdkfd on freenode some time ago which a couple of interested
academics and users hang.

Kind Regards,
Edward.

> Thanks,
> John
> 
>> -----Original Message-----
>> From: Oded Gabbay [mailto:oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org]
>> Sent: Friday, February 10, 2017 12:57 PM
>> To: Andres Rodriguez
>> Cc: Kuehling, Felix; Bridgman, John; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org;
>> Deucher, Alexander; Jay Cornwall
>> Subject: Re: Change queue/pipe split between amdkfd and amdgpu
>>
>> I don't have a repo, nor do I have the source code.
>> It is a tool that we developed inside AMD (when I was working there), and
>> after I left AMD I got permission to use the binary for regressions testing.
>>
>> Oded
>>
>> On Fri, Feb 10, 2017 at 6:33 PM, Andres Rodriguez <andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>> wrote:
>>> Hey Oded,
>>>
>>> Where can I find a repo with kfdtest?
>>>
>>> I tried looking here bit couldn't find it:
>>>
>>> https://cgit.freedesktop.org/~gabbayo/
>>>
>>> -Andres
>>>
>>>
>>>
>>> On 2017-02-10 05:35 AM, Oded Gabbay wrote:
>>>>
>>>> So the warning in dmesg is gone of course, but the test (that I
>>>> mentioned in previous email) still fails, and this time it caused the
>>>> kernel to crash. In addition, now other tests fail as well, e.g.
>>>> KFDEventTest.SignalEvent
>>>>
>>>> I honestly suggest to take some time to debug this patch-set on an
>>>> actual Kaveri machine and then re-send the patches.
>>>>
>>>> Thanks,
>>>> Oded
>>>>
>>>> log of crash from KFDQMTest.CreateMultipleCpQueues:
>>>>
>>>> [  160.900137] kfd: qcm fence wait loop timeout expired [
>>>> 160.900143] kfd: the cp might be in an unrecoverable state due to an
>>>> unsuccessful queues preemption [  160.916765] show_signal_msg: 36
>>>> callbacks suppressed [  160.916771] kfdtest[2498]: segfault at
>>>> 100007f8a ip 00007f8ae932ee5d sp 00007ffc52219cd0 error 4 in
>>>> libhsakmt-1.so.0.0.1[7f8ae932b000+8000]
>>>> [  163.152229] kfd: qcm fence wait loop timeout expired [
>>>> 163.152250] BUG: unable to handle kernel NULL pointer dereference at
>>>> 000000000000005a [  163.152299] IP:
>>>> kfd_get_process_device_data+0x6/0x30 [amdkfd] [  163.152323] PGD
>>>> 2333aa067 [  163.152323] PUD 230f64067 [  163.152335] PMD 0
>>>>
>>>> [  163.152364] Oops: 0000 [#1] SMP
>>>> [  163.152379] Modules linked in: joydev edac_mce_amd edac_core
>>>> input_leds kvm_amd snd_hda_codec_realtek kvm irqbypass
>>>> snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_intel
>> snd_hda_codec
>>>> crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_core
>>>> snd_hwdep pcbc snd_pcm aesni_intel snd_seq_midi snd_seq_midi_event
>>>> snd_rawmidi snd_seq aes_x86_64 crypto_simd snd_seq_device
>> glue_helper
>>>> cryptd snd_timer snd fam15h_power k10temp soundcore i2c_piix4 shpchp
>>>> tpm_infineon mac_hid parport_pc ppdev nfsd auth_rpcgss nfs_acl lockd
>>>> lp grace sunrpc parport autofs4 hid_logitech_hidpp hid_logitech_dj
>>>> hid_generic usbhid hid uas usb_storage amdkfd amd_iommu_v2 radeon
>>>> i2c_algo_bit ttm drm_kms_helper syscopyarea ahci sysfillrect
>>>> sysimgblt libahci fb_sys_fops drm r8169 mii fjes video [  163.152668]
>>>> CPU: 3 PID: 2498 Comm: kfdtest Not tainted 4.10.0-rc5+ #3 [
>>>> 163.152695] Hardware name: Gigabyte Technology Co., Ltd. To be filled
>>>> by O.E.M./F2A88XM-D3H, BIOS F5 01/09/2014 [  163.152735] task:
>>>> ffff995e73d16580 task.stack: ffffb41144458000 [  163.152764] RIP:
>>>> 0010:kfd_get_process_device_data+0x6/0x30 [amdkfd] [  163.152790]
>>>> RSP: 0018:ffffb4114445bab0 EFLAGS: 00010246 [  163.152812] RAX:
>>>> ffffffffffffffea RBX: ffff995e75909c00 RCX:
>>>> 0000000000000000
>>>> [  163.152841] RDX: 0000000000000000 RSI: ffffffffffffffea RDI:
>>>> ffff995e75909600
>>>> [  163.152869] RBP: ffffb4114445bae0 R08: 00000000000252a5 R09:
>>>> 0000000000000414
>>>> [  163.152898] R10: 0000000000000000 R11: ffffffffb412d38d R12:
>>>> 00000000ffffffc2
>>>> [  163.152926] R13: 0000000000000000 R14: ffff995e75909ca8 R15:
>>>> ffff995e75909c00
>>>> [  163.152956] FS:  00007f8ae975e740(0000) GS:ffff995e7ed80000(0000)
>>>> knlGS:0000000000000000
>>>> [  163.152988] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [
>>>> 163.153012] CR2: 000000000000005a CR3: 00000002216ab000 CR4:
>>>> 00000000000406e0
>>>> [  163.153041] Call Trace:
>>>> [  163.153059]  ? destroy_queues_cpsch+0x166/0x190 [amdkfd] [
>>>> 163.153086]  execute_queues_cpsch+0x2e/0xc0 [amdkfd] [  163.153113]
>>>> destroy_queue_cpsch+0xbd/0x140 [amdkfd] [  163.153139]
>>>> pqm_destroy_queue+0x111/0x1d0 [amdkfd] [  163.153164]
>>>> pqm_uninit+0x3f/0xb0 [amdkfd] [  163.153186]
>>>> kfd_unbind_process_from_device+0x51/0xd0 [amdkfd] [  163.153214]
>>>> iommu_pasid_shutdown_callback+0x20/0x30 [amdkfd] [  163.153239]
>>>> mn_release+0x37/0x70 [amd_iommu_v2] [  163.153261]
>>>> __mmu_notifier_release+0x44/0xc0 [  163.153281]
>>>> exit_mmap+0x15a/0x170 [  163.153297]  ? __wake_up+0x44/0x50 [
>>>> 163.153314]  ? exit_robust_list+0x5c/0x110 [  163.153333]
>>>> mmput+0x57/0x140 [  163.153347]  do_exit+0x26b/0xb30 [  163.153362]
>>>> do_group_exit+0x43/0xb0 [  163.153379]  get_signal+0x293/0x620 [
>>>> 163.153396]  do_signal+0x37/0x760 [  163.153411]  ?
>>>> print_vma_addr+0x82/0x100 [  163.153429]  ? vprintk_default+0x29/0x50
>>>> [  163.153447]  ? bad_area+0x46/0x50 [  163.153463]  ?
>>>> __do_page_fault+0x3c7/0x4e0 [  163.153481]
>>>> exit_to_usermode_loop+0x76/0xb0 [  163.153500]
>>>> prepare_exit_to_usermode+0x2f/0x40
>>>> [  163.153521]  retint_user+0x8/0x10
>>>> [  163.153536] RIP: 0033:0x7f8ae932ee5d [  163.153551] RSP:
>>>> 002b:00007ffc52219cd0 EFLAGS: 00010202 [  163.153573] RAX:
>>>> 0000000000000003 RBX: 0000000100007f8a RCX:
>>>> 00007ffc52219d00
>>>> [  163.153602] RDX: 00007f8ae9534220 RSI: 00007f8ae8b5eb28 RDI:
>>>> 0000000100007f8a
>>>> [  163.153630] RBP: 00007ffc52219d20 R08: 0000000001cc1890 R09:
>>>> 0000000000000000
>>>> [  163.153659] R10: 0000000000000027 R11: 00007f8ae932ee10 R12:
>>>> 0000000001cc52a0
>>>> [  163.153687] R13: 00007ffc5221a200 R14: 0000000000000021 R15:
>>>> 0000000000000000
>>>> [  163.153716] Code: e0 04 00 00 48 3b 91 f0 03 00 00 74 01 c3 55 48
>>>> 89 e5 e8 2e f9 ff ff 5d c3 66 90 66 2e 0f 1f 84 00 00 00 00 00 0f 1f
>>>> 44 00 00 55 <48> 8b 46 70 48 83 c6 70 48 89 e5 48 39 f0 74 16 48 3b
>>>> 78
>>>> 10 75
>>>> [  163.153818] RIP: kfd_get_process_device_data+0x6/0x30 [amdkfd] RSP:
>>>> ffffb4114445bab0
>>>> [  163.153848] CR2: 000000000000005a
>>>> [  163.160389] ---[ end trace f6a8177c7119c1f5 ]--- [  163.160390]
>>>> Fixing recursive fault but reboot is needed!
>>>>
>>>> On Thu, Feb 9, 2017 at 10:38 PM, Andres Rodriguez
>>>> <andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>>> wrote:
>>>>>
>>>>> Hey Oded,
>>>>>
>>>>> Sorry to be a nuisance, but if you have everything still setup could
>>>>> you give this fix a quick go?
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>>>> index 5321d18..9f70ee0 100644
>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>>>> @@ -667,7 +667,7 @@ static int set_sched_resources(struct
>>>>> device_queue_manager *dqm)
>>>>>                  /* This situation may be hit in the future if a new HW
>>>>>                   * generation exposes more than 64 queues. If so, the
>>>>>                   * definition of res.queue_mask needs updating */
>>>>> -               if (WARN_ON(i > sizeof(res.queue_mask))) {
>>>>> +               if (WARN_ON(i > (sizeof(res.queue_mask)*8))) {
>>>>>                          pr_err("Invalid queue enabled by amdgpu:
>>>>> %d\n", i);
>>>>>                          break;
>>>>>                  }
>>>>>
>>>>> John/Felix,
>>>>>
>>>>> Any chance I could borrow a carrizo/kaveri for a few days? Or maybe
>>>>> you could help me run some final tests on this patch series?
>>>>>
>>>>> - Andres
>>>>>
>>>>>
>>>>>
>>>>> On 2017-02-09 03:11 PM, Oded Gabbay wrote:
>>>>>>
>>>>>>    Andres,
>>>>>>
>>>>>> I tried your patches on Kaveri with airlied's drm-next branch.
>>>>>> I used radeon+amdkfd
>>>>>>
>>>>>> The following test failed: KFDQMTest.CreateMultipleCpQueues
>>>>>> However, I can't debug it because I don't have the sources of kfdtest.
>>>>>>
>>>>>> In dmesg, I saw the following warning during boot:
>>>>>> WARNING: CPU: 0 PID: 150 at
>>>>>> drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c:670
>>>>>> start_cpsch+0xc5/0x220 [amdkfd]
>>>>>> [    4.393796] Modules linked in: hid_logitech_hidpp hid_logitech_dj
>>>>>> hid_generic usbhid hid uas usb_storage amdkfd amd_iommu_v2
>>>>>> radeon(+) i2c_algo_bit ttm drm_kms_helper syscopyarea ahci
>>>>>> sysfillrect sysimgblt libahci fb_sys_fops drm r8169 mii fjes video
>>>>>> [    4.393811] CPU: 0 PID: 150 Comm: systemd-udevd Not tainted
>>>>>> 4.10.0-rc5+
>>>>>> #1
>>>>>> [    4.393811] Hardware name: Gigabyte Technology Co., Ltd. To be
>>>>>> filled by O.E.M./F2A88XM-D3H, BIOS F5 01/09/2014
>>>>>> [    4.393812] Call Trace:
>>>>>> [    4.393818]  dump_stack+0x63/0x90
>>>>>> [    4.393822]  __warn+0xcb/0xf0
>>>>>> [    4.393823]  warn_slowpath_null+0x1d/0x20
>>>>>> [    4.393830]  start_cpsch+0xc5/0x220 [amdkfd]
>>>>>> [    4.393836]  ? initialize_cpsch+0xa0/0xb0 [amdkfd]
>>>>>> [    4.393841]  kgd2kfd_device_init+0x375/0x490 [amdkfd]
>>>>>> [    4.393883]  radeon_kfd_device_init+0xaf/0xd0 [radeon]
>>>>>> [    4.393911]  radeon_driver_load_kms+0x11e/0x1f0 [radeon]
>>>>>> [    4.393933]  drm_dev_register+0x14a/0x200 [drm]
>>>>>> [    4.393946]  drm_get_pci_dev+0x9d/0x160 [drm]
>>>>>> [    4.393974]  radeon_pci_probe+0xb8/0xe0 [radeon]
>>>>>> [    4.393976]  local_pci_probe+0x45/0xa0
>>>>>> [    4.393978]  pci_device_probe+0x103/0x150
>>>>>> [    4.393981]  driver_probe_device+0x2bf/0x460
>>>>>> [    4.393982]  __driver_attach+0xdf/0xf0
>>>>>> [    4.393984]  ? driver_probe_device+0x460/0x460
>>>>>> [    4.393985]  bus_for_each_dev+0x6c/0xc0
>>>>>> [    4.393987]  driver_attach+0x1e/0x20
>>>>>> [    4.393988]  bus_add_driver+0x1fd/0x270
>>>>>> [    4.393989]  ? 0xffffffffc05c8000
>>>>>> [    4.393991]  driver_register+0x60/0xe0
>>>>>> [    4.393992]  ? 0xffffffffc05c8000
>>>>>> [    4.393993]  __pci_register_driver+0x4c/0x50
>>>>>> [    4.394007]  drm_pci_init+0xeb/0x100 [drm]
>>>>>> [    4.394008]  ? 0xffffffffc05c8000
>>>>>> [    4.394031]  radeon_init+0x98/0xb6 [radeon]
>>>>>> [    4.394034]  do_one_initcall+0x53/0x1a0
>>>>>> [    4.394037]  ? __vunmap+0x81/0xd0
>>>>>> [    4.394039]  ? kmem_cache_alloc_trace+0x152/0x1c0
>>>>>> [    4.394041]  ? vfree+0x2e/0x70
>>>>>> [    4.394044]  do_init_module+0x5f/0x1ff
>>>>>> [    4.394046]  load_module+0x24cc/0x29f0
>>>>>> [    4.394047]  ? __symbol_put+0x60/0x60
>>>>>> [    4.394050]  ? security_kernel_post_read_file+0x6b/0x80
>>>>>> [    4.394052]  SYSC_finit_module+0xdf/0x110
>>>>>> [    4.394054]  SyS_finit_module+0xe/0x10
>>>>>> [    4.394056]  entry_SYSCALL_64_fastpath+0x1e/0xad
>>>>>> [    4.394058] RIP: 0033:0x7f9cda77c8e9
>>>>>> [    4.394059] RSP: 002b:00007ffe195d3378 EFLAGS: 00000246 ORIG_RAX:
>>>>>> 0000000000000139
>>>>>> [    4.394060] RAX: ffffffffffffffda RBX: 00007f9cdb8dda7e RCX:
>>>>>> 00007f9cda77c8e9
>>>>>> [    4.394061] RDX: 0000000000000000 RSI: 00007f9cdac7ce2a RDI:
>>>>>> 0000000000000013
>>>>>> [    4.394062] RBP: 00007ffe195d2450 R08: 0000000000000000 R09:
>>>>>> 0000000000000000
>>>>>> [    4.394063] R10: 0000000000000013 R11: 0000000000000246 R12:
>>>>>> 00007ffe195d245a
>>>>>> [    4.394063] R13: 00007ffe195d1378 R14: 0000563f70cc93b0 R15:
>>>>>> 0000563f70cba4d0
>>>>>> [    4.394091] ---[ end trace 9c5af17304d998bb ]---
>>>>>> [    4.394092] Invalid queue enabled by amdgpu: 9
>>>>>>
>>>>>> I suggest you get a Kaveri/Carrizo machine to debug these issues.
>>>>>>
>>>>>> Until that, I don't think we should merge this patch-set.
>>>>>>
>>>>>> Oded
>>>>>>
>>>>>> On Wed, Feb 8, 2017 at 9:47 PM, Andres Rodriguez
>>>>>> <andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>>>>> wrote:
>>>>>>>
>>>>>>> Thank you Oded.
>>>>>>>
>>>>>>> - Andres
>>>>>>>
>>>>>>>
>>>>>>> On 2017-02-08 02:32 PM, Oded Gabbay wrote:
>>>>>>>>
>>>>>>>> On Wed, Feb 8, 2017 at 6:23 PM, Andres Rodriguez
>>>>>>>> <andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>>>>>>> wrote:
>>>>>>>>>
>>>>>>>>> Hey Felix,
>>>>>>>>>
>>>>>>>>> Thanks for the pointer to the ROCm mqd commit. I like that the
>>>>>>>>> workarounds are easy to spot. I'll add that to a new patch
>>>>>>>>> series I'm working on for some bug-fixes for perf being lower on
>>>>>>>>> pipes other than pipe 0.
>>>>>>>>>
>>>>>>>>> I haven't tested this yet on kaveri/carrizo. I'm hoping someone
>>>>>>>>> with the HW will be able to give it a go. I put in a few small
>>>>>>>>> hacks to get KFD to boot but do nothing on polaris10.
>>>>>>>>>
>>>>>>>>> Regards,
>>>>>>>>> Andres
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 2017-02-06 03:20 PM, Felix Kuehling wrote:
>>>>>>>>>>
>>>>>>>>>> Hi Andres,
>>>>>>>>>>
>>>>>>>>>> Thank you for tackling this task. It's more involved than I
>>>>>>>>>> expected, mostly because I didn't have much awareness of the
>>>>>>>>>> MQD management in amdgpu.
>>>>>>>>>>
>>>>>>>>>> I made one comment in a separate message about the unified MQD
>>>>>>>>>> commit function, if you want to bring that more in line with
>>>>>>>>>> our latest ROCm release on github.
>>>>>>>>>>
>>>>>>>>>> Also, were you able to test the upstream KFD with your changes
>>>>>>>>>> on a Kaveri or Carrizo?
>>>>>>>>>>
>>>>>>>>>> Regards,
>>>>>>>>>>      Felix
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> On 17-02-03 11:51 PM, Andres Rodriguez wrote:
>>>>>>>>>>>
>>>>>>>>>>> The current queue/pipe split policy is for amdgpu to take the
>>>>>>>>>>> first pipe of
>>>>>>>>>>> MEC0 and leave the rest for amdkfd to use. This policy is
>>>>>>>>>>> taken as an assumption in a few areas of the implementation.
>>>>>>>>>>>
>>>>>>>>>>> This patch series aims to allow for flexible/tunable
>>>>>>>>>>> queue/pipe split policies between kgd and kfd. It also updates
>>>>>>>>>>> the queue/pipe split policy to one that allows better compute
>>>>>>>>>>> app concurrency for both drivers.
>>>>>>>>>>>
>>>>>>>>>>> In the process some duplicate code and hardcoded constants
>>>>>>>>>>> were removed.
>>>>>>>>>>>
>>>>>>>>>>> Any suggestions or feedback on improvements welcome.
>>>>>>>>>>>
>>>>>>>>> _______________________________________________
>>>>>>>>> amd-gfx mailing list
>>>>>>>>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>>>>>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>>>>>>>
>>>>>>>> Hi Andres,
>>>>>>>> I will try to find sometime to test it on my Kaveri machine.
>>>>>>>>
>>>>>>>> Oded
>>>>>>>
>>>>>>>
>>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 


[-- Attachment #1.2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2017-02-16  7:14 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-04  4:51 Change queue/pipe split between amdkfd and amdgpu Andres Rodriguez
     [not found] ` <20170204045142.5596-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-02-04  4:51   ` [PATCH 01/13] drm/amdgpu: refactor MQD/HQD initialization Andres Rodriguez
2017-02-04  4:51   ` [PATCH 02/13] drm/amdgpu: doorbell registers need only be set once Andres Rodriguez
     [not found]     ` <20170204045142.5596-3-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-02-06  8:31       ` Christian König
     [not found]         ` <7d85c562-5227-1fef-7b99-1f7543e6e69b-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-02-08 16:25           ` Andres Rodriguez
2017-02-04  4:51   ` [PATCH 03/13] drm/amdgpu: detect timeout error when deactivating hqd Andres Rodriguez
2017-02-04  4:51   ` [PATCH 04/13] drm/amdgpu: remove duplicate definition of cik_mqd Andres Rodriguez
2017-02-04  4:51   ` [PATCH 05/13] drm/amdgpu: unify MQD programming sequence for kfd and amdgpu Andres Rodriguez
     [not found]     ` <20170204045142.5596-6-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-02-06 20:16       ` Felix Kuehling
2017-02-04  4:51   ` [PATCH 06/13] drm/amdgpu: rename rdev to adev Andres Rodriguez
2017-02-04  4:51   ` [PATCH 07/13] drm/amdgpu: take ownership of per-pipe configuration Andres Rodriguez
2017-02-04  4:51   ` [PATCH 08/13] drm/radeon: take ownership of pipe initialization Andres Rodriguez
2017-02-04  4:51   ` [PATCH 09/13] drm/amdgpu: allow split of queues with kfd at queue granularity Andres Rodriguez
2017-02-04  4:51   ` [PATCH 10/13] drm/amdkfd: allow split HQD split on per-queue granularity Andres Rodriguez
2017-02-04  4:51   ` [PATCH 11/13] drm/amdgpu: remove duplicate magic constants from amdgpu_amdkfd_gfx*.c Andres Rodriguez
2017-02-04  4:51   ` [PATCH 12/13] drm/amdgpu: allocate queues horizontally across pipes Andres Rodriguez
2017-02-04  4:51   ` [PATCH 13/13] drm/amdgpu: new queue policy, take first 2 queues of each pipe Andres Rodriguez
     [not found]     ` <20170204045142.5596-14-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-02-04 12:08       ` Edward O'Callaghan
     [not found]         ` <86138a88-e90f-3234-7109-67ca0c427071-dczkZgxz+BNUPWh3PAxdjQ@public.gmane.org>
2017-02-06  8:35           ` Christian König
2017-02-06 20:20   ` Change queue/pipe split between amdkfd and amdgpu Felix Kuehling
     [not found]     ` <206b40fe-b958-8a78-623b-011712dc5ecc-5C7GfCeVMHo@public.gmane.org>
2017-02-08 16:23       ` Andres Rodriguez
     [not found]         ` <852b8cfe-d886-e78e-de6c-1641b107ed8f-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-02-08 19:32           ` Oded Gabbay
     [not found]             ` <CAFCwf10BboWSwU9HAMjryuLw2K2ANpjC8hGgGwkJh6z8K3pR4Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-02-08 19:47               ` Andres Rodriguez
     [not found]                 ` <50aecc56-f080-d343-9e49-e3955ba1212e-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-02-09 20:11                   ` Oded Gabbay
     [not found]                     ` <CAFCwf10L8=mwxCup6-S5Yirxit8MJEZR=rhLPF3NVnotGCSYiQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-02-09 20:19                       ` Andres Rodriguez
2017-02-09 20:38                       ` Andres Rodriguez
     [not found]                         ` <8e51b688-d978-d40f-8aa3-ae1090ab6a03-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-02-10 10:35                           ` Oded Gabbay
     [not found]                             ` <CAFCwf10agJ+C0X-jvPa5jSJcR--+u3-TC6LuVQwW1+o94uGonA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-02-10 16:33                               ` Andres Rodriguez
     [not found]                                 ` <37bd1bf7-4db6-6004-f61d-5084efd1ec83-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-02-10 17:56                                   ` Oded Gabbay
     [not found]                                     ` <CAFCwf125SHM52z2UAp_Y0rRQe9UHpeWMDDpBkf1csPJyAgXTeA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-02-16  4:00                                       ` Bridgman, John
     [not found]                                         ` <BN6PR12MB1348C4F31B81516D5EAAFCA3E85A0-/b2+HYfkarQX0pEhCR5T8QdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-02-16  7:14                                           ` Edward O'Callaghan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.