All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/8] drm/amdgpu: fix incorrect use of amdgpu_irq_add_id in si_dma.c
@ 2018-10-08 13:35 Christian König
       [not found] ` <20181008133521.3237-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Christian König @ 2018-10-08 13:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Adding a second irq source because of a different src_id is actually a
bug.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  4 ----
 drivers/gpu/drm/amd/amdgpu/si_dma.c      | 27 ++++++++-------------------
 2 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index d17503f0df8e..500113ec65ca 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -46,10 +46,6 @@ struct amdgpu_sdma_instance {
 
 struct amdgpu_sdma {
 	struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
-#ifdef CONFIG_DRM_AMDGPU_SI
-	//SI DMA has a difference trap irq number for the second engine
-	struct amdgpu_irq_src	trap_irq_1;
-#endif
 	struct amdgpu_irq_src	trap_irq;
 	struct amdgpu_irq_src	illegal_inst_irq;
 	int			num_instances;
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index d4ceaf440f26..adbaea6da0d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -502,12 +502,14 @@ static int si_dma_sw_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	/* DMA0 trap event */
-	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224, &adev->sdma.trap_irq);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
+			      &adev->sdma.trap_irq);
 	if (r)
 		return r;
 
 	/* DMA1 trap event */
-	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244, &adev->sdma.trap_irq_1);
+	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244,
+			      &adev->sdma.trap_irq);
 	if (r)
 		return r;
 
@@ -649,17 +651,10 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev,
 				      struct amdgpu_irq_src *source,
 				      struct amdgpu_iv_entry *entry)
 {
-	amdgpu_fence_process(&adev->sdma.instance[0].ring);
-
-	return 0;
-}
-
-static int si_dma_process_trap_irq_1(struct amdgpu_device *adev,
-				      struct amdgpu_irq_src *source,
-				      struct amdgpu_iv_entry *entry)
-{
-	amdgpu_fence_process(&adev->sdma.instance[1].ring);
-
+	if (entry->src_id == 224)
+		amdgpu_fence_process(&adev->sdma.instance[0].ring);
+	else
+		amdgpu_fence_process(&adev->sdma.instance[1].ring);
 	return 0;
 }
 
@@ -786,11 +781,6 @@ static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = {
 	.process = si_dma_process_trap_irq,
 };
 
-static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs_1 = {
-	.set = si_dma_set_trap_irq_state,
-	.process = si_dma_process_trap_irq_1,
-};
-
 static const struct amdgpu_irq_src_funcs si_dma_illegal_inst_irq_funcs = {
 	.process = si_dma_process_illegal_inst_irq,
 };
@@ -799,7 +789,6 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev)
 {
 	adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
 	adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs;
-	adev->sdma.trap_irq_1.funcs = &si_dma_trap_irq_funcs_1;
 	adev->sdma.illegal_inst_irq.funcs = &si_dma_illegal_inst_irq_funcs;
 }
 
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
       [not found] ` <20181008133521.3237-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-10-08 13:35   ` Christian König
       [not found]     ` <20181008133521.3237-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-10-08 13:35   ` [PATCH 3/8] drm/amdgpu: add basics for SDMA page queue support Christian König
                     ` (6 subsequent siblings)
  7 siblings, 1 reply; 25+ messages in thread
From: Christian König @ 2018-10-08 13:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Under SRIOV we were enabling the ring buffer before it was initialized.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 234 ++++++++++++++++-----------------
 1 file changed, 116 insertions(+), 118 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index c20d413f277c..5ecf6c9252c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -673,13 +673,14 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
  * sdma_v4_0_gfx_resume - setup and start the async dma engines
  *
  * @adev: amdgpu_device pointer
+ * @i: instance to resume
  *
  * Set up the gfx DMA ring buffers and enable them (VEGA10).
  * Returns 0 for success, error for failure.
  */
-static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
+static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 {
-	struct amdgpu_ring *ring;
+	struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
 	u32 rb_cntl, ib_cntl, wptr_poll_cntl;
 	u32 rb_bufsz;
 	u32 wb_offset;
@@ -687,129 +688,108 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
 	u32 doorbell_offset;
 	u32 temp;
 	u64 wptr_gpu_addr;
-	int i, r;
 
-	for (i = 0; i < adev->sdma.num_instances; i++) {
-		ring = &adev->sdma.instance[i].ring;
-		wb_offset = (ring->rptr_offs * 4);
+	wb_offset = (ring->rptr_offs * 4);
 
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
 
-		/* Set ring buffer size in dwords */
-		rb_bufsz = order_base_2(ring->ring_size / 4);
-		rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
-		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+	/* Set ring buffer size in dwords */
+	rb_bufsz = order_base_2(ring->ring_size / 4);
+	rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
 #ifdef __BIG_ENDIAN
-		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
-		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
-					RPTR_WRITEBACK_SWAP_ENABLE, 1);
+	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+				RPTR_WRITEBACK_SWAP_ENABLE, 1);
 #endif
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
 
-		/* Initialize the ring buffer's read and write pointers */
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+	/* Initialize the ring buffer's read and write pointers */
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
 
-		/* set the wb address whether it's enabled or not */
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
-		       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
-		       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+	/* set the wb address whether it's enabled or not */
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+	       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+	       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
 
-		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
 
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
 
-		ring->wptr = 0;
+	ring->wptr = 0;
 
-		/* before programing wptr to a less value, need set minor_ptr_update first */
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+	/* before programing wptr to a less value, need set minor_ptr_update first */
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
 
-		if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
-			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
-			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
-		}
+	if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+	}
 
-		doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
-		doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
+	doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+	doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
 
-		if (ring->use_doorbell) {
-			doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
-			doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
-					OFFSET, ring->doorbell_index);
-		} else {
-			doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
-		}
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
-		adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
-						      ring->doorbell_index);
-
-		if (amdgpu_sriov_vf(adev))
-			sdma_v4_0_ring_set_wptr(ring);
-
-		/* set minor_ptr_update to 0 after wptr programed */
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
-
-		/* set utc l1 enable flag always to 1 */
-		temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
-		temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
-
-		if (!amdgpu_sriov_vf(adev)) {
-			/* unhalt engine */
-			temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
-			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
-			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
-		}
+	if (ring->use_doorbell) {
+		doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+		doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+				OFFSET, ring->doorbell_index);
+	} else {
+		doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+	}
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
+	adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+					      ring->doorbell_index);
 
-		/* setup the wptr shadow polling */
-		wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
-		       lower_32_bits(wptr_gpu_addr));
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
-		       upper_32_bits(wptr_gpu_addr));
-		wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
-		if (amdgpu_sriov_vf(adev))
-			wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
-		else
-			wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
+	if (amdgpu_sriov_vf(adev))
+		sdma_v4_0_ring_set_wptr(ring);
 
-		/* enable DMA RB */
-		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+	/* set minor_ptr_update to 0 after wptr programed */
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
 
-		ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
-		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
-#ifdef __BIG_ENDIAN
-		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
-#endif
-		/* enable DMA IBs */
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+	/* set utc l1 enable flag always to 1 */
+	temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+	temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
 
-		ring->ready = true;
+	if (!amdgpu_sriov_vf(adev)) {
+		/* unhalt engine */
+		temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+		temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+	}
 
-		if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
-			sdma_v4_0_ctx_switch_enable(adev, true);
-			sdma_v4_0_enable(adev, true);
-		}
+	/* setup the wptr shadow polling */
+	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+	       lower_32_bits(wptr_gpu_addr));
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+	       upper_32_bits(wptr_gpu_addr));
+	wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+	if (amdgpu_sriov_vf(adev))
+		wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
+	else
+		wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
 
-		r = amdgpu_ring_test_ring(ring);
-		if (r) {
-			ring->ready = false;
-			return r;
-		}
+	/* enable DMA RB */
+	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
 
-		if (adev->mman.buffer_funcs_ring == ring)
-			amdgpu_ttm_set_buffer_funcs_status(adev, true);
-
-	}
+	ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+	/* enable DMA IBs */
+	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
 
-	return 0;
+	ring->ready = true;
 }
 
 static void
@@ -941,33 +921,51 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
  */
 static int sdma_v4_0_start(struct amdgpu_device *adev)
 {
-	int r = 0;
+	struct amdgpu_ring *ring;
+	int i, r;
 
 	if (amdgpu_sriov_vf(adev)) {
 		sdma_v4_0_ctx_switch_enable(adev, false);
 		sdma_v4_0_enable(adev, false);
+	} else {
 
-		/* set RB registers */
-		r = sdma_v4_0_gfx_resume(adev);
-		return r;
+		if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+			r = sdma_v4_0_load_microcode(adev);
+			if (r)
+				return r;
+		}
+
+		/* unhalt the MEs */
+		sdma_v4_0_enable(adev, true);
+		/* enable sdma ring preemption */
+		sdma_v4_0_ctx_switch_enable(adev, true);
 	}
 
-	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
-		r = sdma_v4_0_load_microcode(adev);
+	/* start the gfx rings and rlc compute queues */
+	for (i = 0; i < adev->sdma.num_instances; i++)
+		sdma_v4_0_gfx_resume(adev, i);
+
+	if (amdgpu_sriov_vf(adev)) {
+		sdma_v4_0_ctx_switch_enable(adev, true);
+		sdma_v4_0_enable(adev, true);
+	} else {
+		r = sdma_v4_0_rlc_resume(adev);
 		if (r)
 			return r;
 	}
 
-	/* unhalt the MEs */
-	sdma_v4_0_enable(adev, true);
-	/* enable sdma ring preemption */
-	sdma_v4_0_ctx_switch_enable(adev, true);
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		ring = &adev->sdma.instance[i].ring;
 
-	/* start the gfx rings and rlc compute queues */
-	r = sdma_v4_0_gfx_resume(adev);
-	if (r)
-		return r;
-	r = sdma_v4_0_rlc_resume(adev);
+		r = amdgpu_ring_test_ring(ring);
+		if (r) {
+			ring->ready = false;
+			return r;
+		}
+
+		if (adev->mman.buffer_funcs_ring == ring)
+			amdgpu_ttm_set_buffer_funcs_status(adev, true);
+	}
 
 	return r;
 }
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 3/8] drm/amdgpu: add basics for SDMA page queue support
       [not found] ` <20181008133521.3237-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-10-08 13:35   ` [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV Christian König
@ 2018-10-08 13:35   ` Christian König
       [not found]     ` <20181008133521.3237-3-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-10-08 13:35   ` [PATCH 4/8] drm/amdgpu: remove non gfx specific handling from sdma_v4_0_gfx_resume Christian König
                     ` (5 subsequent siblings)
  7 siblings, 1 reply; 25+ messages in thread
From: Christian König @ 2018-10-08 13:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Just the common helper and a new ring in the SDMA instance.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 10 ++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  1 +
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index bc9244b429ef..0fb9907494bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -34,11 +34,9 @@ struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 	int i;
 
 	for (i = 0; i < adev->sdma.num_instances; i++)
-		if (&adev->sdma.instance[i].ring == ring)
-			break;
+		if (ring == &adev->sdma.instance[i].ring ||
+		    ring == &adev->sdma.instance[i].page)
+			return &adev->sdma.instance[i];
 
-	if (i < AMDGPU_MAX_SDMA_INSTANCES)
-		return &adev->sdma.instance[i];
-	else
-		return NULL;
+	return NULL;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 500113ec65ca..556db42edaed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -41,6 +41,7 @@ struct amdgpu_sdma_instance {
 	uint32_t		feature_version;
 
 	struct amdgpu_ring	ring;
+	struct amdgpu_ring	page;
 	bool			burst_nop;
 };
 
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 4/8] drm/amdgpu: remove non gfx specific handling from sdma_v4_0_gfx_resume
       [not found] ` <20181008133521.3237-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-10-08 13:35   ` [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV Christian König
  2018-10-08 13:35   ` [PATCH 3/8] drm/amdgpu: add basics for SDMA page queue support Christian König
@ 2018-10-08 13:35   ` Christian König
       [not found]     ` <20181008133521.3237-4-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-10-08 13:35   ` [PATCH 5/8] drm/amdgpu: remove SRIOV " Christian König
                     ` (4 subsequent siblings)
  7 siblings, 1 reply; 25+ messages in thread
From: Christian König @ 2018-10-08 13:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Needed to start using the paging queue.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 36 +++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 5ecf6c9252c4..1124b45d166d 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -686,13 +686,10 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 	u32 wb_offset;
 	u32 doorbell;
 	u32 doorbell_offset;
-	u32 temp;
 	u64 wptr_gpu_addr;
 
 	wb_offset = (ring->rptr_offs * 4);
 
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
-
 	/* Set ring buffer size in dwords */
 	rb_bufsz = order_base_2(ring->ring_size / 4);
 	rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
@@ -752,18 +749,6 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 	/* set minor_ptr_update to 0 after wptr programed */
 	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
 
-	/* set utc l1 enable flag always to 1 */
-	temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
-	temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
-
-	if (!amdgpu_sriov_vf(adev)) {
-		/* unhalt engine */
-		temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
-		temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
-	}
-
 	/* setup the wptr shadow polling */
 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
 	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
@@ -942,9 +927,28 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
 	}
 
 	/* start the gfx rings and rlc compute queues */
-	for (i = 0; i < adev->sdma.num_instances; i++)
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		uint32_t temp;
+
+		WREG32(sdma_v4_0_get_reg_offset(adev, i,
+				mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
 		sdma_v4_0_gfx_resume(adev, i);
 
+		/* set utc l1 enable flag always to 1 */
+		temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+		temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+
+		if (!amdgpu_sriov_vf(adev)) {
+			/* unhalt engine */
+			temp = RREG32(sdma_v4_0_get_reg_offset(adev, i,
+					mmSDMA0_F32_CNTL));
+			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+			WREG32(sdma_v4_0_get_reg_offset(adev, i,
+					mmSDMA0_F32_CNTL), temp);
+		}
+	}
+
 	if (amdgpu_sriov_vf(adev)) {
 		sdma_v4_0_ctx_switch_enable(adev, true);
 		sdma_v4_0_enable(adev, true);
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 5/8] drm/amdgpu: remove SRIOV specific handling from sdma_v4_0_gfx_resume
       [not found] ` <20181008133521.3237-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2018-10-08 13:35   ` [PATCH 4/8] drm/amdgpu: remove non gfx specific handling from sdma_v4_0_gfx_resume Christian König
@ 2018-10-08 13:35   ` Christian König
       [not found]     ` <20181008133521.3237-5-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-10-08 13:35   ` [PATCH 6/8] drm/amdgpu: add some [WR]REG32_SDMA macros to sdma_v4_0.c Christian König
                     ` (3 subsequent siblings)
  7 siblings, 1 reply; 25+ messages in thread
From: Christian König @ 2018-10-08 13:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Just use the same code path for both SRIOV and bare metal.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 1124b45d166d..61da9b862ede 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -723,11 +723,6 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 	/* before programing wptr to a less value, need set minor_ptr_update first */
 	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
 
-	if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
-	}
-
 	doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
 	doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
 
@@ -743,8 +738,7 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 	adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
 					      ring->doorbell_index);
 
-	if (amdgpu_sriov_vf(adev))
-		sdma_v4_0_ring_set_wptr(ring);
+	sdma_v4_0_ring_set_wptr(ring);
 
 	/* set minor_ptr_update to 0 after wptr programed */
 	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 6/8] drm/amdgpu: add some [WR]REG32_SDMA macros to sdma_v4_0.c
       [not found] ` <20181008133521.3237-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (3 preceding siblings ...)
  2018-10-08 13:35   ` [PATCH 5/8] drm/amdgpu: remove SRIOV " Christian König
@ 2018-10-08 13:35   ` Christian König
       [not found]     ` <20181008133521.3237-6-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-10-08 13:35   ` [PATCH 7/8] drm/amdgpu: activate paging queue on SDMA v4 Christian König
                     ` (2 subsequent siblings)
  7 siblings, 1 reply; 25+ messages in thread
From: Christian König @ 2018-10-08 13:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Significantly shortens the code.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 126 ++++++++++++++++-----------------
 1 file changed, 63 insertions(+), 63 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 61da9b862ede..55384bad7a70 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -54,6 +54,11 @@ MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
 #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK  0x000000F8L
 #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
 
+#define WREG32_SDMA(instance, offset, value) \
+	WREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)), value)
+#define RREG32_SDMA(instance, offset) \
+	RREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)))
+
 static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
 static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
 static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
@@ -367,8 +372,8 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
 	} else {
 		u32 lowbit, highbit;
 
-		lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2;
-		highbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
+		lowbit = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR) >> 2;
+		highbit = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI) >> 2;
 
 		DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
 				ring->me, highbit, lowbit);
@@ -415,8 +420,10 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
 				lower_32_bits(ring->wptr << 2),
 				ring->me,
 				upper_32_bits(ring->wptr << 2));
-		WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
-		WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
+		WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR,
+			    lower_32_bits(ring->wptr << 2));
+		WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI,
+			    upper_32_bits(ring->wptr << 2));
 	}
 }
 
@@ -566,12 +573,12 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
 			amdgpu_ttm_set_buffer_funcs_status(adev, false);
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
-		rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+		rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
-		ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+		WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
+		ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+		WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
 	}
 
 	sdma0->ready = false;
@@ -628,18 +635,15 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
 	}
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
-		f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+		f32_cntl = RREG32_SDMA(i, mmSDMA0_CNTL);
 		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
 				AUTO_CTXSW_ENABLE, enable ? 1 : 0);
 		if (enable && amdgpu_sdma_phase_quantum) {
-			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
-			       phase_quantum);
-			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
-			       phase_quantum);
-			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
-			       phase_quantum);
+			WREG32_SDMA(i, mmSDMA0_PHASE0_QUANTUM, phase_quantum);
+			WREG32_SDMA(i, mmSDMA0_PHASE1_QUANTUM, phase_quantum);
+			WREG32_SDMA(i, mmSDMA0_PHASE2_QUANTUM, phase_quantum);
 		}
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
+		WREG32_SDMA(i, mmSDMA0_CNTL, f32_cntl);
 	}
 
 }
@@ -663,9 +667,9 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
 	}
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
-		f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+		f32_cntl = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
 		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
+		WREG32_SDMA(i, mmSDMA0_F32_CNTL, f32_cntl);
 	}
 }
 
@@ -692,39 +696,39 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 
 	/* Set ring buffer size in dwords */
 	rb_bufsz = order_base_2(ring->ring_size / 4);
-	rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+	rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
 #ifdef __BIG_ENDIAN
 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
 				RPTR_WRITEBACK_SWAP_ENABLE, 1);
 #endif
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+	WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
 
 	/* Initialize the ring buffer's read and write pointers */
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR, 0);
+	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_HI, 0);
+	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR, 0);
+	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_HI, 0);
 
 	/* set the wb address whether it's enabled or not */
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI,
 	       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO,
 	       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
 
 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
 
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+	WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8);
+	WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
 
 	ring->wptr = 0;
 
 	/* before programing wptr to a less value, need set minor_ptr_update first */
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+	WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 1);
 
-	doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
-	doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
+	doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL);
+	doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET);
 
 	if (ring->use_doorbell) {
 		doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
@@ -733,40 +737,40 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 	} else {
 		doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
 	}
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
+	WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell);
+	WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset);
 	adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
 					      ring->doorbell_index);
 
 	sdma_v4_0_ring_set_wptr(ring);
 
 	/* set minor_ptr_update to 0 after wptr programed */
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+	WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0);
 
 	/* setup the wptr shadow polling */
 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
-	       lower_32_bits(wptr_gpu_addr));
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
-	       upper_32_bits(wptr_gpu_addr));
-	wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO,
+		    lower_32_bits(wptr_gpu_addr));
+	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI,
+		    upper_32_bits(wptr_gpu_addr));
+	wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL);
 	if (amdgpu_sriov_vf(adev))
 		wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
 	else
 		wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
+	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
 
 	/* enable DMA RB */
 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+	WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
 
-	ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+	ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
 	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
 #ifdef __BIG_ENDIAN
 	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
 #endif
 	/* enable DMA IBs */
-	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+	WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
 
 	ring->ready = true;
 }
@@ -879,12 +883,14 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
 			(adev->sdma.instance[i].fw->data +
 				le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
+		WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, 0);
 
 		for (j = 0; j < fw_size; j++)
-			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
+			WREG32_SDMA(i, mmSDMA0_UCODE_DATA,
+				    le32_to_cpup(fw_data++));
 
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
+		WREG32_SDMA(i, mmSDMA0_UCODE_ADDR,
+			    adev->sdma.instance[i].fw_version);
 	}
 
 	return 0;
@@ -924,22 +930,19 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		uint32_t temp;
 
-		WREG32(sdma_v4_0_get_reg_offset(adev, i,
-				mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+		WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0);
 		sdma_v4_0_gfx_resume(adev, i);
 
 		/* set utc l1 enable flag always to 1 */
-		temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+		temp = RREG32_SDMA(i, mmSDMA0_CNTL);
 		temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
-		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
+		WREG32_SDMA(i, mmSDMA0_CNTL, temp);
 
 		if (!amdgpu_sriov_vf(adev)) {
 			/* unhalt engine */
-			temp = RREG32(sdma_v4_0_get_reg_offset(adev, i,
-					mmSDMA0_F32_CNTL));
+			temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
 			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
-			WREG32(sdma_v4_0_get_reg_offset(adev, i,
-					mmSDMA0_F32_CNTL), temp);
+			WREG32_SDMA(i, mmSDMA0_F32_CNTL, temp);
 		}
 	}
 
@@ -1406,7 +1409,7 @@ static bool sdma_v4_0_is_idle(void *handle)
 	u32 i;
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
-		u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
+		u32 tmp = RREG32_SDMA(i, mmSDMA0_STATUS_REG);
 
 		if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
 			return false;
@@ -1422,8 +1425,8 @@ static int sdma_v4_0_wait_for_idle(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	for (i = 0; i < adev->usec_timeout; i++) {
-		sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
-		sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
+		sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG);
+		sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG);
 
 		if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
 			return 0;
@@ -1444,16 +1447,13 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
 					unsigned type,
 					enum amdgpu_interrupt_state state)
 {
+	unsigned int instance = (type == AMDGPU_SDMA_IRQ_TRAP0) ? 0 : 1;
 	u32 sdma_cntl;
 
-	u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ?
-		sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) :
-		sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL);
-
-	sdma_cntl = RREG32(reg_offset);
+	sdma_cntl = RREG32_SDMA(instance, mmSDMA0_CNTL);
 	sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
 		       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
-	WREG32(reg_offset, sdma_cntl);
+	WREG32_SDMA(instance, mmSDMA0_CNTL, sdma_cntl);
 
 	return 0;
 }
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 7/8] drm/amdgpu: activate paging queue on SDMA v4
       [not found] ` <20181008133521.3237-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (4 preceding siblings ...)
  2018-10-08 13:35   ` [PATCH 6/8] drm/amdgpu: add some [WR]REG32_SDMA macros to sdma_v4_0.c Christian König
@ 2018-10-08 13:35   ` Christian König
       [not found]     ` <20181008133521.3237-7-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-10-08 13:35   ` [PATCH 8/8] drm/amdgpu: use paging queue for VM page table updates Christian König
  2018-10-09  8:37   ` [PATCH 1/8] drm/amdgpu: fix incorrect use of amdgpu_irq_add_id in si_dma.c Huang Rui
  7 siblings, 1 reply; 25+ messages in thread
From: Christian König @ 2018-10-08 13:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Implement all the necessary stuff to get those extra rings working.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 324 ++++++++++++++++++++++++++++-----
 1 file changed, 274 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 55384bad7a70..a362904d73f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -427,6 +427,57 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
 	}
 }
 
+/**
+ * sdma_v4_0_page_ring_get_wptr - get the current write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Get the current wptr from the hardware (VEGA10+).
+ */
+static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	u64 wptr;
+
+	if (ring->use_doorbell) {
+		/* XXX check if swapping is necessary on BE */
+		wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+	} else {
+		wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI);
+		wptr = wptr << 32;
+		wptr |= RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR);
+	}
+
+	return wptr >> 2;
+}
+
+/**
+ * sdma_v4_0_ring_set_wptr - commit the write pointer
+ *
+ * @ring: amdgpu ring pointer
+ *
+ * Write the wptr back to the hardware (VEGA10+).
+ */
+static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (ring->use_doorbell) {
+		u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
+
+		/* XXX check if swapping is necessary on BE */
+		WRITE_ONCE(*wb, (ring->wptr << 2));
+		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+	} else {
+		uint64_t wptr = ring->wptr << 2;
+
+		WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR,
+			    lower_32_bits(wptr));
+		WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI,
+			    upper_32_bits(wptr));
+	}
+}
+
 static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 {
 	struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
@@ -597,6 +648,35 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
 	/* XXX todo */
 }
 
+/**
+ * sdma_v4_0_page_stop - stop the page async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Stop the page async dma ring buffers (VEGA10).
+ */
+static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page;
+	struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page;
+	u32 rb_cntl, ib_cntl;
+	int i;
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
+		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
+					RB_ENABLE, 0);
+		WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
+		ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
+		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL,
+					IB_ENABLE, 0);
+		WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
+	}
+
+	sdma0->ready = false;
+	sdma1->ready = false;
+}
+
 /**
  * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
  *
@@ -664,6 +744,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
 	if (enable == false) {
 		sdma_v4_0_gfx_stop(adev);
 		sdma_v4_0_rlc_stop(adev);
+		sdma_v4_0_page_stop(adev);
 	}
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
@@ -673,6 +754,23 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
 	}
 }
 
+/**
+ * sdma_v4_0_rb_cntl - get parameters for rb_cntl
+ */
+static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
+{
+	/* Set ring buffer size in dwords */
+	uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
+
+	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+#ifdef __BIG_ENDIAN
+	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+				RPTR_WRITEBACK_SWAP_ENABLE, 1);
+#endif
+	return rb_cntl;
+}
+
 /**
  * sdma_v4_0_gfx_resume - setup and start the async dma engines
  *
@@ -686,7 +784,6 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 {
 	struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
 	u32 rb_cntl, ib_cntl, wptr_poll_cntl;
-	u32 rb_bufsz;
 	u32 wb_offset;
 	u32 doorbell;
 	u32 doorbell_offset;
@@ -694,15 +791,8 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 
 	wb_offset = (ring->rptr_offs * 4);
 
-	/* Set ring buffer size in dwords */
-	rb_bufsz = order_base_2(ring->ring_size / 4);
 	rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
-	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
-#ifdef __BIG_ENDIAN
-	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
-	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
-				RPTR_WRITEBACK_SWAP_ENABLE, 1);
-#endif
+	rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
 	WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
 
 	/* Initialize the ring buffer's read and write pointers */
@@ -717,7 +807,8 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO,
 	       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
 
-	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+				RPTR_WRITEBACK_ENABLE, 1);
 
 	WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8);
 	WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
@@ -730,13 +821,11 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 	doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL);
 	doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET);
 
-	if (ring->use_doorbell) {
-		doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
-		doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
-				OFFSET, ring->doorbell_index);
-	} else {
-		doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
-	}
+	doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE,
+				 ring->use_doorbell);
+	doorbell_offset = REG_SET_FIELD(doorbell_offset,
+					SDMA0_GFX_DOORBELL_OFFSET,
+					OFFSET, ring->doorbell_index);
 	WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell);
 	WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset);
 	adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
@@ -754,10 +843,9 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI,
 		    upper_32_bits(wptr_gpu_addr));
 	wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL);
-	if (amdgpu_sriov_vf(adev))
-		wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
-	else
-		wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
+	wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+				       SDMA0_GFX_RB_WPTR_POLL_CNTL,
+				       F32_POLL_ENABLE, amdgpu_sriov_vf(adev));
 	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
 
 	/* enable DMA RB */
@@ -775,6 +863,99 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 	ring->ready = true;
 }
 
+/**
+ * sdma_v4_0_page_resume - setup and start the async dma engines
+ *
+ * @adev: amdgpu_device pointer
+ * @i: instance to resume
+ *
+ * Set up the page DMA ring buffers and enable them (VEGA10).
+ * Returns 0 for success, error for failure.
+ */
+static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
+{
+	struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
+	u32 rb_cntl, ib_cntl, wptr_poll_cntl;
+	u32 wb_offset;
+	u32 doorbell;
+	u32 doorbell_offset;
+	u64 wptr_gpu_addr;
+
+	wb_offset = (ring->rptr_offs * 4);
+
+	rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
+	rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
+	WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
+
+	/* Initialize the ring buffer's read and write pointers */
+	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR, 0);
+	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_HI, 0);
+	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR, 0);
+	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_HI, 0);
+
+	/* set the wb address whether it's enabled or not */
+	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI,
+	       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO,
+	       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+
+	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
+				RPTR_WRITEBACK_ENABLE, 1);
+
+	WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE, ring->gpu_addr >> 8);
+	WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
+
+	ring->wptr = 0;
+
+	/* before programing wptr to a less value, need set minor_ptr_update first */
+	WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 1);
+
+	doorbell = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL);
+	doorbell_offset = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET);
+
+	doorbell = REG_SET_FIELD(doorbell, SDMA0_PAGE_DOORBELL, ENABLE,
+				 ring->use_doorbell);
+	doorbell_offset = REG_SET_FIELD(doorbell_offset,
+					SDMA0_PAGE_DOORBELL_OFFSET,
+					OFFSET, ring->doorbell_index);
+	WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL, doorbell);
+	WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET, doorbell_offset);
+	/* TODO: enable doorbell support */
+	/*adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+					      ring->doorbell_index);*/
+
+	sdma_v4_0_ring_set_wptr(ring);
+
+	/* set minor_ptr_update to 0 after wptr programed */
+	WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0);
+
+	/* setup the wptr shadow polling */
+	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO,
+		    lower_32_bits(wptr_gpu_addr));
+	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI,
+		    upper_32_bits(wptr_gpu_addr));
+	wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL);
+	wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
+				       SDMA0_PAGE_RB_WPTR_POLL_CNTL,
+				       F32_POLL_ENABLE, amdgpu_sriov_vf(adev));
+	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
+
+	/* enable DMA RB */
+	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 1);
+	WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
+
+	ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
+	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+	/* enable DMA IBs */
+	WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
+
+	ring->ready = true;
+}
+
 static void
 sdma_v4_1_update_power_gating(struct amdgpu_device *adev, bool enable)
 {
@@ -932,6 +1113,7 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
 
 		WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0);
 		sdma_v4_0_gfx_resume(adev, i);
+		sdma_v4_0_page_resume(adev, i);
 
 		/* set utc l1 enable flag always to 1 */
 		temp = RREG32_SDMA(i, mmSDMA0_CNTL);
@@ -1337,6 +1519,19 @@ static int sdma_v4_0_sw_init(void *handle)
 				     AMDGPU_SDMA_IRQ_TRAP1);
 		if (r)
 			return r;
+
+		ring = &adev->sdma.instance[i].page;
+		ring->ring_obj = NULL;
+		ring->use_doorbell = false;
+
+		sprintf(ring->name, "page%d", i);
+		r = amdgpu_ring_init(adev, ring, 1024,
+				     &adev->sdma.trap_irq,
+				     (i == 0) ?
+				     AMDGPU_SDMA_IRQ_TRAP0 :
+				     AMDGPU_SDMA_IRQ_TRAP1);
+		if (r)
+			return r;
 	}
 
 	return r;
@@ -1347,8 +1542,10 @@ static int sdma_v4_0_sw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int i;
 
-	for (i = 0; i < adev->sdma.num_instances; i++)
+	for (i = 0; i < adev->sdma.num_instances; i++) {
 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+		amdgpu_ring_fini(&adev->sdma.instance[i].page);
+	}
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		release_firmware(adev->sdma.instance[i].fw);
@@ -1462,39 +1659,32 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
 				      struct amdgpu_irq_src *source,
 				      struct amdgpu_iv_entry *entry)
 {
+	uint32_t instance;
+
 	DRM_DEBUG("IH: SDMA trap\n");
 	switch (entry->client_id) {
 	case SOC15_IH_CLIENTID_SDMA0:
-		switch (entry->ring_id) {
-		case 0:
-			amdgpu_fence_process(&adev->sdma.instance[0].ring);
-			break;
-		case 1:
-			/* XXX compute */
-			break;
-		case 2:
-			/* XXX compute */
-			break;
-		case 3:
-			/* XXX page queue*/
-			break;
-		}
+		instance = 0;
 		break;
 	case SOC15_IH_CLIENTID_SDMA1:
-		switch (entry->ring_id) {
-		case 0:
-			amdgpu_fence_process(&adev->sdma.instance[1].ring);
-			break;
-		case 1:
-			/* XXX compute */
-			break;
-		case 2:
-			/* XXX compute */
-			break;
-		case 3:
-			/* XXX page queue*/
-			break;
-		}
+		instance = 1;
+		break;
+	default:
+		return 0;
+	}
+
+	switch (entry->ring_id) {
+	case 0:
+		amdgpu_fence_process(&adev->sdma.instance[instance].ring);
+		break;
+	case 1:
+		/* XXX compute */
+		break;
+	case 2:
+		/* XXX compute */
+		break;
+	case 3:
+		amdgpu_fence_process(&adev->sdma.instance[instance].page);
 		break;
 	}
 	return 0;
@@ -1722,6 +1912,38 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
 	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
 };
 
+static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
+	.type = AMDGPU_RING_TYPE_SDMA,
+	.align_mask = 0xf,
+	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
+	.support_64bit_ptrs = true,
+	.vmhub = AMDGPU_MMHUB,
+	.get_rptr = sdma_v4_0_ring_get_rptr,
+	.get_wptr = sdma_v4_0_page_ring_get_wptr,
+	.set_wptr = sdma_v4_0_page_ring_set_wptr,
+	.emit_frame_size =
+		6 + /* sdma_v4_0_ring_emit_hdp_flush */
+		3 + /* hdp invalidate */
+		6 + /* sdma_v4_0_ring_emit_pipeline_sync */
+		/* sdma_v4_0_ring_emit_vm_flush */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
+	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
+	.emit_ib = sdma_v4_0_ring_emit_ib,
+	.emit_fence = sdma_v4_0_ring_emit_fence,
+	.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
+	.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
+	.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
+	.test_ring = sdma_v4_0_ring_test_ring,
+	.test_ib = sdma_v4_0_ring_test_ib,
+	.insert_nop = sdma_v4_0_ring_insert_nop,
+	.pad_ib = sdma_v4_0_ring_pad_ib,
+	.emit_wreg = sdma_v4_0_ring_emit_wreg,
+	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+};
+
 static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
 {
 	int i;
@@ -1729,6 +1951,8 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
 		adev->sdma.instance[i].ring.me = i;
+		adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs;
+		adev->sdma.instance[i].page.me = i;
 	}
 }
 
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 8/8] drm/amdgpu: use paging queue for VM page table updates
       [not found] ` <20181008133521.3237-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (5 preceding siblings ...)
  2018-10-08 13:35   ` [PATCH 7/8] drm/amdgpu: activate paging queue on SDMA v4 Christian König
@ 2018-10-08 13:35   ` Christian König
       [not found]     ` <20181008133521.3237-8-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-10-09  8:37   ` [PATCH 1/8] drm/amdgpu: fix incorrect use of amdgpu_irq_add_id in si_dma.c Huang Rui
  7 siblings, 1 reply; 25+ messages in thread
From: Christian König @ 2018-10-08 13:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Only for testing, not sure if we should keep it like this.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index a362904d73f7..5fa80b231da3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2052,7 +2052,7 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
 
 	adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
 	for (i = 0; i < adev->sdma.num_instances; i++) {
-		sched = &adev->sdma.instance[i].ring.sched;
+		sched = &adev->sdma.instance[i].page.sched;
 		adev->vm_manager.vm_pte_rqs[i] =
 			&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
 	}
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/8] drm/amdgpu: fix incorrect use of amdgpu_irq_add_id in si_dma.c
       [not found] ` <20181008133521.3237-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (6 preceding siblings ...)
  2018-10-08 13:35   ` [PATCH 8/8] drm/amdgpu: use paging queue for VM page table updates Christian König
@ 2018-10-09  8:37   ` Huang Rui
  7 siblings, 0 replies; 25+ messages in thread
From: Huang Rui @ 2018-10-09  8:37 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Mon, Oct 08, 2018 at 03:35:14PM +0200, Christian König wrote:
> Adding a second irq source because of a different src_id is actually a
> bug.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Huang Rui <ray.huang@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  4 ----
>  drivers/gpu/drm/amd/amdgpu/si_dma.c      | 27 ++++++++-------------------
>  2 files changed, 8 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> index d17503f0df8e..500113ec65ca 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> @@ -46,10 +46,6 @@ struct amdgpu_sdma_instance {
>  
>  struct amdgpu_sdma {
>  	struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
> -#ifdef CONFIG_DRM_AMDGPU_SI
> -	//SI DMA has a difference trap irq number for the second engine
> -	struct amdgpu_irq_src	trap_irq_1;
> -#endif
>  	struct amdgpu_irq_src	trap_irq;
>  	struct amdgpu_irq_src	illegal_inst_irq;
>  	int			num_instances;
> diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> index d4ceaf440f26..adbaea6da0d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
> @@ -502,12 +502,14 @@ static int si_dma_sw_init(void *handle)
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>  
>  	/* DMA0 trap event */
> -	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224, &adev->sdma.trap_irq);
> +	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224,
> +			      &adev->sdma.trap_irq);
>  	if (r)
>  		return r;
>  
>  	/* DMA1 trap event */
> -	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244, &adev->sdma.trap_irq_1);
> +	r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244,
> +			      &adev->sdma.trap_irq);
>  	if (r)
>  		return r;
>  
> @@ -649,17 +651,10 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev,
>  				      struct amdgpu_irq_src *source,
>  				      struct amdgpu_iv_entry *entry)
>  {
> -	amdgpu_fence_process(&adev->sdma.instance[0].ring);
> -
> -	return 0;
> -}
> -
> -static int si_dma_process_trap_irq_1(struct amdgpu_device *adev,
> -				      struct amdgpu_irq_src *source,
> -				      struct amdgpu_iv_entry *entry)
> -{
> -	amdgpu_fence_process(&adev->sdma.instance[1].ring);
> -
> +	if (entry->src_id == 224)
> +		amdgpu_fence_process(&adev->sdma.instance[0].ring);
> +	else
> +		amdgpu_fence_process(&adev->sdma.instance[1].ring);
>  	return 0;
>  }
>  
> @@ -786,11 +781,6 @@ static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = {
>  	.process = si_dma_process_trap_irq,
>  };
>  
> -static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs_1 = {
> -	.set = si_dma_set_trap_irq_state,
> -	.process = si_dma_process_trap_irq_1,
> -};
> -
>  static const struct amdgpu_irq_src_funcs si_dma_illegal_inst_irq_funcs = {
>  	.process = si_dma_process_illegal_inst_irq,
>  };
> @@ -799,7 +789,6 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev)
>  {
>  	adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
>  	adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs;
> -	adev->sdma.trap_irq_1.funcs = &si_dma_trap_irq_funcs_1;
>  	adev->sdma.illegal_inst_irq.funcs = &si_dma_illegal_inst_irq_funcs;
>  }
>  
> -- 
> 2.14.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
       [not found]     ` <20181008133521.3237-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-10-09  9:17       ` Huang Rui
  2018-10-09 10:56         ` Christian König
  0 siblings, 1 reply; 25+ messages in thread
From: Huang Rui @ 2018-10-09  9:17 UTC (permalink / raw)
  To: Christian König, Monk Liu, Frank Min
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Mon, Oct 08, 2018 at 03:35:15PM +0200, Christian König wrote:
> Under SRIOV we were enabling the ring buffer before it was initialized.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 234 ++++++++++++++++-----------------
>  1 file changed, 116 insertions(+), 118 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index c20d413f277c..5ecf6c9252c4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -673,13 +673,14 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
>   * sdma_v4_0_gfx_resume - setup and start the async dma engines
>   *
>   * @adev: amdgpu_device pointer
> + * @i: instance to resume
>   *
>   * Set up the gfx DMA ring buffers and enable them (VEGA10).
>   * Returns 0 for success, error for failure.
>   */
> -static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
> +static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
>  {
> -	struct amdgpu_ring *ring;
> +	struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
>  	u32 rb_cntl, ib_cntl, wptr_poll_cntl;
>  	u32 rb_bufsz;
>  	u32 wb_offset;
> @@ -687,129 +688,108 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
>  	u32 doorbell_offset;
>  	u32 temp;
>  	u64 wptr_gpu_addr;
> -	int i, r;
>  
> -	for (i = 0; i < adev->sdma.num_instances; i++) {
> -		ring = &adev->sdma.instance[i].ring;
> -		wb_offset = (ring->rptr_offs * 4);
> +	wb_offset = (ring->rptr_offs * 4);
>  
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
>  
> -		/* Set ring buffer size in dwords */
> -		rb_bufsz = order_base_2(ring->ring_size / 4);
> -		rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
> -		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
> +	/* Set ring buffer size in dwords */
> +	rb_bufsz = order_base_2(ring->ring_size / 4);
> +	rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
> +	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
>  #ifdef __BIG_ENDIAN
> -		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
> -		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
> -					RPTR_WRITEBACK_SWAP_ENABLE, 1);
> +	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
> +	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
> +				RPTR_WRITEBACK_SWAP_ENABLE, 1);
>  #endif
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
>  
> -		/* Initialize the ring buffer's read and write pointers */
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
> +	/* Initialize the ring buffer's read and write pointers */
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
>  
> -		/* set the wb address whether it's enabled or not */
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
> -		       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
> -		       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
> +	/* set the wb address whether it's enabled or not */
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
> +	       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
> +	       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
>  
> -		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
> +	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
>  
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
>  
> -		ring->wptr = 0;
> +	ring->wptr = 0;
>  
> -		/* before programing wptr to a less value, need set minor_ptr_update first */
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
> +	/* before programing wptr to a less value, need set minor_ptr_update first */
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
>  
> -		if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
> -			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
> -			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
> -		}
> +	if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
> +		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
> +		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
> +	}
>  
> -		doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> -		doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
> +	doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> +	doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
>  
> -		if (ring->use_doorbell) {
> -			doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
> -			doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
> -					OFFSET, ring->doorbell_index);
> -		} else {
> -			doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
> -		}
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
> -		adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
> -						      ring->doorbell_index);
> -
> -		if (amdgpu_sriov_vf(adev))
> -			sdma_v4_0_ring_set_wptr(ring);
> -
> -		/* set minor_ptr_update to 0 after wptr programed */
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
> -
> -		/* set utc l1 enable flag always to 1 */
> -		temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
> -		temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
> -
> -		if (!amdgpu_sriov_vf(adev)) {
> -			/* unhalt engine */
> -			temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
> -			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
> -			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
> -		}
> +	if (ring->use_doorbell) {
> +		doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
> +		doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
> +				OFFSET, ring->doorbell_index);
> +	} else {
> +		doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
> +	}
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
> +	adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
> +					      ring->doorbell_index);
>  
> -		/* setup the wptr shadow polling */
> -		wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
> -		       lower_32_bits(wptr_gpu_addr));
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
> -		       upper_32_bits(wptr_gpu_addr));
> -		wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
> -		if (amdgpu_sriov_vf(adev))
> -			wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
> -		else
> -			wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
> +	if (amdgpu_sriov_vf(adev))
> +		sdma_v4_0_ring_set_wptr(ring);
>  
> -		/* enable DMA RB */
> -		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
> +	/* set minor_ptr_update to 0 after wptr programed */
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
>  
> -		ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
> -		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
> -#ifdef __BIG_ENDIAN
> -		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
> -#endif
> -		/* enable DMA IBs */
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
> +	/* set utc l1 enable flag always to 1 */
> +	temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
> +	temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
>  
> -		ring->ready = true;
> +	if (!amdgpu_sriov_vf(adev)) {
> +		/* unhalt engine */
> +		temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
> +		temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
> +		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
> +	}
>  
> -		if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
> -			sdma_v4_0_ctx_switch_enable(adev, true);
> -			sdma_v4_0_enable(adev, true);
> -		}
> +	/* setup the wptr shadow polling */
> +	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
> +	       lower_32_bits(wptr_gpu_addr));
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
> +	       upper_32_bits(wptr_gpu_addr));
> +	wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
> +	if (amdgpu_sriov_vf(adev))
> +		wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
> +	else
> +		wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
>  
> -		r = amdgpu_ring_test_ring(ring);
> -		if (r) {
> -			ring->ready = false;
> -			return r;
> -		}
> +	/* enable DMA RB */
> +	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
>  
> -		if (adev->mman.buffer_funcs_ring == ring)
> -			amdgpu_ttm_set_buffer_funcs_status(adev, true);
> -
> -	}
> +	ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
> +	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
> +#ifdef __BIG_ENDIAN
> +	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
> +#endif
> +	/* enable DMA IBs */
> +	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
>  
> -	return 0;
> +	ring->ready = true;
>  }
>  
>  static void
> @@ -941,33 +921,51 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
>   */
>  static int sdma_v4_0_start(struct amdgpu_device *adev)
>  {
> -	int r = 0;
> +	struct amdgpu_ring *ring;
> +	int i, r;
>  
>  	if (amdgpu_sriov_vf(adev)) {
>  		sdma_v4_0_ctx_switch_enable(adev, false);
>  		sdma_v4_0_enable(adev, false);
> +	} else {
>  
> -		/* set RB registers */
> -		r = sdma_v4_0_gfx_resume(adev);
> -		return r;
> +		if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
> +			r = sdma_v4_0_load_microcode(adev);
> +			if (r)
> +				return r;
> +		}
> +
> +		/* unhalt the MEs */
> +		sdma_v4_0_enable(adev, true);
> +		/* enable sdma ring preemption */
> +		sdma_v4_0_ctx_switch_enable(adev, true);
>  	}
>  
> -	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
> -		r = sdma_v4_0_load_microcode(adev);
> +	/* start the gfx rings and rlc compute queues */
> +	for (i = 0; i < adev->sdma.num_instances; i++)
> +		sdma_v4_0_gfx_resume(adev, i);
> +
> +	if (amdgpu_sriov_vf(adev)) {
> +		sdma_v4_0_ctx_switch_enable(adev, true);
> +		sdma_v4_0_enable(adev, true);
> +	} else {
> +		r = sdma_v4_0_rlc_resume(adev);
>  		if (r)
>  			return r;
>  	}

+ Monk, Frank,

I probably cannot judge here, under SRIOV, I saw you disable ctx switch
before. Do you have any concern if we enabled it here.

Others, looks good for me. Christian, may we know which kind of jobs will
use sdma page queue(ring), you know, we just sdma gfx queue(ring) before?

Thanks,
Ray

>  
> -	/* unhalt the MEs */
> -	sdma_v4_0_enable(adev, true);
> -	/* enable sdma ring preemption */
> -	sdma_v4_0_ctx_switch_enable(adev, true);
> +	for (i = 0; i < adev->sdma.num_instances; i++) {
> +		ring = &adev->sdma.instance[i].ring;
>  
> -	/* start the gfx rings and rlc compute queues */
> -	r = sdma_v4_0_gfx_resume(adev);
> -	if (r)
> -		return r;
> -	r = sdma_v4_0_rlc_resume(adev);
> +		r = amdgpu_ring_test_ring(ring);
> +		if (r) {
> +			ring->ready = false;
> +			return r;
> +		}
> +
> +		if (adev->mman.buffer_funcs_ring == ring)
> +			amdgpu_ttm_set_buffer_funcs_status(adev, true);
> +	}
>  
>  	return r;
>  }
> -- 
> 2.14.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 3/8] drm/amdgpu: add basics for SDMA page queue support
       [not found]     ` <20181008133521.3237-3-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-10-09  9:31       ` Huang Rui
  0 siblings, 0 replies; 25+ messages in thread
From: Huang Rui @ 2018-10-09  9:31 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Mon, Oct 08, 2018 at 03:35:16PM +0200, Christian König wrote:
> Just the common helper and a new ring in the SDMA instance.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Huang Rui <ray.huang@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 10 ++++------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  1 +
>  2 files changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> index bc9244b429ef..0fb9907494bb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> @@ -34,11 +34,9 @@ struct amdgpu_sdma_instance * amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>  	int i;
>  
>  	for (i = 0; i < adev->sdma.num_instances; i++)
> -		if (&adev->sdma.instance[i].ring == ring)
> -			break;
> +		if (ring == &adev->sdma.instance[i].ring ||
> +		    ring == &adev->sdma.instance[i].page)
> +			return &adev->sdma.instance[i];
>  
> -	if (i < AMDGPU_MAX_SDMA_INSTANCES)
> -		return &adev->sdma.instance[i];
> -	else
> -		return NULL;
> +	return NULL;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> index 500113ec65ca..556db42edaed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> @@ -41,6 +41,7 @@ struct amdgpu_sdma_instance {
>  	uint32_t		feature_version;
>  
>  	struct amdgpu_ring	ring;
> +	struct amdgpu_ring	page;
>  	bool			burst_nop;
>  };
>  
> -- 
> 2.14.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 4/8] drm/amdgpu: remove non gfx specific handling from sdma_v4_0_gfx_resume
       [not found]     ` <20181008133521.3237-4-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-10-09  9:34       ` Huang Rui
  0 siblings, 0 replies; 25+ messages in thread
From: Huang Rui @ 2018-10-09  9:34 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Mon, Oct 08, 2018 at 03:35:17PM +0200, Christian König wrote:
> Needed to start using the paging queue.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Huang Rui <ray.huang@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 36 +++++++++++++++++++---------------
>  1 file changed, 20 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 5ecf6c9252c4..1124b45d166d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -686,13 +686,10 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
>  	u32 wb_offset;
>  	u32 doorbell;
>  	u32 doorbell_offset;
> -	u32 temp;
>  	u64 wptr_gpu_addr;
>  
>  	wb_offset = (ring->rptr_offs * 4);
>  
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
> -
>  	/* Set ring buffer size in dwords */
>  	rb_bufsz = order_base_2(ring->ring_size / 4);
>  	rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
> @@ -752,18 +749,6 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
>  	/* set minor_ptr_update to 0 after wptr programed */
>  	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
>  
> -	/* set utc l1 enable flag always to 1 */
> -	temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
> -	temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
> -
> -	if (!amdgpu_sriov_vf(adev)) {
> -		/* unhalt engine */
> -		temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
> -		temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
> -	}
> -
>  	/* setup the wptr shadow polling */
>  	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
>  	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
> @@ -942,9 +927,28 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
>  	}
>  
>  	/* start the gfx rings and rlc compute queues */
> -	for (i = 0; i < adev->sdma.num_instances; i++)
> +	for (i = 0; i < adev->sdma.num_instances; i++) {
> +		uint32_t temp;
> +
> +		WREG32(sdma_v4_0_get_reg_offset(adev, i,
> +				mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
>  		sdma_v4_0_gfx_resume(adev, i);
>  
> +		/* set utc l1 enable flag always to 1 */
> +		temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
> +		temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
> +		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
> +
> +		if (!amdgpu_sriov_vf(adev)) {
> +			/* unhalt engine */
> +			temp = RREG32(sdma_v4_0_get_reg_offset(adev, i,
> +					mmSDMA0_F32_CNTL));
> +			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
> +			WREG32(sdma_v4_0_get_reg_offset(adev, i,
> +					mmSDMA0_F32_CNTL), temp);
> +		}
> +	}
> +
>  	if (amdgpu_sriov_vf(adev)) {
>  		sdma_v4_0_ctx_switch_enable(adev, true);
>  		sdma_v4_0_enable(adev, true);
> -- 
> 2.14.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 5/8] drm/amdgpu: remove SRIOV specific handling from sdma_v4_0_gfx_resume
       [not found]     ` <20181008133521.3237-5-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-10-09  9:35       ` Huang Rui
  0 siblings, 0 replies; 25+ messages in thread
From: Huang Rui @ 2018-10-09  9:35 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Mon, Oct 08, 2018 at 03:35:18PM +0200, Christian König wrote:
> Just use the same code path for both SRIOV and bare metal.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Huang Rui <ray.huang@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 8 +-------
>  1 file changed, 1 insertion(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 1124b45d166d..61da9b862ede 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -723,11 +723,6 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
>  	/* before programing wptr to a less value, need set minor_ptr_update first */
>  	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
>  
> -	if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
> -	}
> -
>  	doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
>  	doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
>  
> @@ -743,8 +738,7 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
>  	adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
>  					      ring->doorbell_index);
>  
> -	if (amdgpu_sriov_vf(adev))
> -		sdma_v4_0_ring_set_wptr(ring);
> +	sdma_v4_0_ring_set_wptr(ring);
>  
>  	/* set minor_ptr_update to 0 after wptr programed */
>  	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
> -- 
> 2.14.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 6/8] drm/amdgpu: add some [WR]REG32_SDMA macros to sdma_v4_0.c
       [not found]     ` <20181008133521.3237-6-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-10-09  9:36       ` Huang Rui
  0 siblings, 0 replies; 25+ messages in thread
From: Huang Rui @ 2018-10-09  9:36 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Mon, Oct 08, 2018 at 03:35:19PM +0200, Christian König wrote:
> Significantly shortens the code.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Huang Rui <ray.huang@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 126 ++++++++++++++++-----------------
>  1 file changed, 63 insertions(+), 63 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 61da9b862ede..55384bad7a70 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -54,6 +54,11 @@ MODULE_FIRMWARE("amdgpu/raven2_sdma.bin");
>  #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK  0x000000F8L
>  #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L
>  
> +#define WREG32_SDMA(instance, offset, value) \
> +	WREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)), value)
> +#define RREG32_SDMA(instance, offset) \
> +	RREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)))
> +
>  static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev);
>  static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev);
>  static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev);
> @@ -367,8 +372,8 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
>  	} else {
>  		u32 lowbit, highbit;
>  
> -		lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2;
> -		highbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
> +		lowbit = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR) >> 2;
> +		highbit = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI) >> 2;
>  
>  		DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
>  				ring->me, highbit, lowbit);
> @@ -415,8 +420,10 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
>  				lower_32_bits(ring->wptr << 2),
>  				ring->me,
>  				upper_32_bits(ring->wptr << 2));
> -		WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
> -		WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
> +		WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR,
> +			    lower_32_bits(ring->wptr << 2));
> +		WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI,
> +			    upper_32_bits(ring->wptr << 2));
>  	}
>  }
>  
> @@ -566,12 +573,12 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
>  			amdgpu_ttm_set_buffer_funcs_status(adev, false);
>  
>  	for (i = 0; i < adev->sdma.num_instances; i++) {
> -		rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
> +		rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
>  		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
> -		ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
> +		WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
> +		ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
>  		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
> +		WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
>  	}
>  
>  	sdma0->ready = false;
> @@ -628,18 +635,15 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
>  	}
>  
>  	for (i = 0; i < adev->sdma.num_instances; i++) {
> -		f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
> +		f32_cntl = RREG32_SDMA(i, mmSDMA0_CNTL);
>  		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
>  				AUTO_CTXSW_ENABLE, enable ? 1 : 0);
>  		if (enable && amdgpu_sdma_phase_quantum) {
> -			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
> -			       phase_quantum);
> -			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
> -			       phase_quantum);
> -			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
> -			       phase_quantum);
> +			WREG32_SDMA(i, mmSDMA0_PHASE0_QUANTUM, phase_quantum);
> +			WREG32_SDMA(i, mmSDMA0_PHASE1_QUANTUM, phase_quantum);
> +			WREG32_SDMA(i, mmSDMA0_PHASE2_QUANTUM, phase_quantum);
>  		}
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
> +		WREG32_SDMA(i, mmSDMA0_CNTL, f32_cntl);
>  	}
>  
>  }
> @@ -663,9 +667,9 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
>  	}
>  
>  	for (i = 0; i < adev->sdma.num_instances; i++) {
> -		f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
> +		f32_cntl = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
>  		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
> +		WREG32_SDMA(i, mmSDMA0_F32_CNTL, f32_cntl);
>  	}
>  }
>  
> @@ -692,39 +696,39 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
>  
>  	/* Set ring buffer size in dwords */
>  	rb_bufsz = order_base_2(ring->ring_size / 4);
> -	rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
> +	rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
>  	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
>  #ifdef __BIG_ENDIAN
>  	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
>  	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
>  				RPTR_WRITEBACK_SWAP_ENABLE, 1);
>  #endif
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
> +	WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
>  
>  	/* Initialize the ring buffer's read and write pointers */
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
> +	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR, 0);
> +	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_HI, 0);
> +	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR, 0);
> +	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_HI, 0);
>  
>  	/* set the wb address whether it's enabled or not */
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
> +	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI,
>  	       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
> +	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO,
>  	       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
>  
>  	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
>  
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
> +	WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8);
> +	WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
>  
>  	ring->wptr = 0;
>  
>  	/* before programing wptr to a less value, need set minor_ptr_update first */
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
> +	WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 1);
>  
> -	doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
> -	doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
> +	doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL);
> +	doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET);
>  
>  	if (ring->use_doorbell) {
>  		doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
> @@ -733,40 +737,40 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
>  	} else {
>  		doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
>  	}
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
> +	WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell);
> +	WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset);
>  	adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
>  					      ring->doorbell_index);
>  
>  	sdma_v4_0_ring_set_wptr(ring);
>  
>  	/* set minor_ptr_update to 0 after wptr programed */
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
> +	WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0);
>  
>  	/* setup the wptr shadow polling */
>  	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
> -	       lower_32_bits(wptr_gpu_addr));
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
> -	       upper_32_bits(wptr_gpu_addr));
> -	wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
> +	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO,
> +		    lower_32_bits(wptr_gpu_addr));
> +	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI,
> +		    upper_32_bits(wptr_gpu_addr));
> +	wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL);
>  	if (amdgpu_sriov_vf(adev))
>  		wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
>  	else
>  		wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
> +	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
>  
>  	/* enable DMA RB */
>  	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
> +	WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
>  
> -	ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
> +	ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
>  	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
>  #ifdef __BIG_ENDIAN
>  	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
>  #endif
>  	/* enable DMA IBs */
> -	WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
> +	WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
>  
>  	ring->ready = true;
>  }
> @@ -879,12 +883,14 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
>  			(adev->sdma.instance[i].fw->data +
>  				le32_to_cpu(hdr->header.ucode_array_offset_bytes));
>  
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0);
> +		WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, 0);
>  
>  		for (j = 0; j < fw_size; j++)
> -			WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++));
> +			WREG32_SDMA(i, mmSDMA0_UCODE_DATA,
> +				    le32_to_cpup(fw_data++));
>  
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version);
> +		WREG32_SDMA(i, mmSDMA0_UCODE_ADDR,
> +			    adev->sdma.instance[i].fw_version);
>  	}
>  
>  	return 0;
> @@ -924,22 +930,19 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
>  	for (i = 0; i < adev->sdma.num_instances; i++) {
>  		uint32_t temp;
>  
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i,
> -				mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
> +		WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0);
>  		sdma_v4_0_gfx_resume(adev, i);
>  
>  		/* set utc l1 enable flag always to 1 */
> -		temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
> +		temp = RREG32_SDMA(i, mmSDMA0_CNTL);
>  		temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
> -		WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
> +		WREG32_SDMA(i, mmSDMA0_CNTL, temp);
>  
>  		if (!amdgpu_sriov_vf(adev)) {
>  			/* unhalt engine */
> -			temp = RREG32(sdma_v4_0_get_reg_offset(adev, i,
> -					mmSDMA0_F32_CNTL));
> +			temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL);
>  			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
> -			WREG32(sdma_v4_0_get_reg_offset(adev, i,
> -					mmSDMA0_F32_CNTL), temp);
> +			WREG32_SDMA(i, mmSDMA0_F32_CNTL, temp);
>  		}
>  	}
>  
> @@ -1406,7 +1409,7 @@ static bool sdma_v4_0_is_idle(void *handle)
>  	u32 i;
>  
>  	for (i = 0; i < adev->sdma.num_instances; i++) {
> -		u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG));
> +		u32 tmp = RREG32_SDMA(i, mmSDMA0_STATUS_REG);
>  
>  		if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
>  			return false;
> @@ -1422,8 +1425,8 @@ static int sdma_v4_0_wait_for_idle(void *handle)
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>  
>  	for (i = 0; i < adev->usec_timeout; i++) {
> -		sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG));
> -		sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG));
> +		sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG);
> +		sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG);
>  
>  		if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK)
>  			return 0;
> @@ -1444,16 +1447,13 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev,
>  					unsigned type,
>  					enum amdgpu_interrupt_state state)
>  {
> +	unsigned int instance = (type == AMDGPU_SDMA_IRQ_TRAP0) ? 0 : 1;
>  	u32 sdma_cntl;
>  
> -	u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ?
> -		sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) :
> -		sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL);
> -
> -	sdma_cntl = RREG32(reg_offset);
> +	sdma_cntl = RREG32_SDMA(instance, mmSDMA0_CNTL);
>  	sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE,
>  		       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
> -	WREG32(reg_offset, sdma_cntl);
> +	WREG32_SDMA(instance, mmSDMA0_CNTL, sdma_cntl);
>  
>  	return 0;
>  }
> -- 
> 2.14.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 7/8] drm/amdgpu: activate paging queue on SDMA v4
       [not found]     ` <20181008133521.3237-7-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-10-09  9:40       ` Huang Rui
  0 siblings, 0 replies; 25+ messages in thread
From: Huang Rui @ 2018-10-09  9:40 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Mon, Oct 08, 2018 at 03:35:20PM +0200, Christian König wrote:
> Implement all the necessary stuff to get those extra rings working.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Reviewed-by: Huang Rui <ray.huang@amd.com>

We have four queue architecture, currently, if include page queue, we only
use two, is there any use case that we need also activate rlc0/rlc1?

Thanks,
Ray

> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 324 ++++++++++++++++++++++++++++-----
>  1 file changed, 274 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 55384bad7a70..a362904d73f7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -427,6 +427,57 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
>  	}
>  }
>  
> +/**
> + * sdma_v4_0_page_ring_get_wptr - get the current write pointer
> + *
> + * @ring: amdgpu ring pointer
> + *
> + * Get the current wptr from the hardware (VEGA10+).
> + */
> +static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +	u64 wptr;
> +
> +	if (ring->use_doorbell) {
> +		/* XXX check if swapping is necessary on BE */
> +		wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
> +	} else {
> +		wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI);
> +		wptr = wptr << 32;
> +		wptr |= RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR);
> +	}
> +
> +	return wptr >> 2;
> +}
> +
> +/**
> + * sdma_v4_0_ring_set_wptr - commit the write pointer
> + *
> + * @ring: amdgpu ring pointer
> + *
> + * Write the wptr back to the hardware (VEGA10+).
> + */
> +static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring)
> +{
> +	struct amdgpu_device *adev = ring->adev;
> +
> +	if (ring->use_doorbell) {
> +		u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
> +
> +		/* XXX check if swapping is necessary on BE */
> +		WRITE_ONCE(*wb, (ring->wptr << 2));
> +		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
> +	} else {
> +		uint64_t wptr = ring->wptr << 2;
> +
> +		WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR,
> +			    lower_32_bits(wptr));
> +		WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI,
> +			    upper_32_bits(wptr));
> +	}
> +}
> +
>  static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>  {
>  	struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
> @@ -597,6 +648,35 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
>  	/* XXX todo */
>  }
>  
> +/**
> + * sdma_v4_0_page_stop - stop the page async dma engines
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * Stop the page async dma ring buffers (VEGA10).
> + */
> +static void sdma_v4_0_page_stop(struct amdgpu_device *adev)
> +{
> +	struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page;
> +	struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page;
> +	u32 rb_cntl, ib_cntl;
> +	int i;
> +
> +	for (i = 0; i < adev->sdma.num_instances; i++) {
> +		rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
> +		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
> +					RB_ENABLE, 0);
> +		WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
> +		ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
> +		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL,
> +					IB_ENABLE, 0);
> +		WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
> +	}
> +
> +	sdma0->ready = false;
> +	sdma1->ready = false;
> +}
> +
>  /**
>   * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch
>   *
> @@ -664,6 +744,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
>  	if (enable == false) {
>  		sdma_v4_0_gfx_stop(adev);
>  		sdma_v4_0_rlc_stop(adev);
> +		sdma_v4_0_page_stop(adev);
>  	}
>  
>  	for (i = 0; i < adev->sdma.num_instances; i++) {
> @@ -673,6 +754,23 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
>  	}
>  }
>  
> +/**
> + * sdma_v4_0_rb_cntl - get parameters for rb_cntl
> + */
> +static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
> +{
> +	/* Set ring buffer size in dwords */
> +	uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
> +
> +	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
> +#ifdef __BIG_ENDIAN
> +	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
> +	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
> +				RPTR_WRITEBACK_SWAP_ENABLE, 1);
> +#endif
> +	return rb_cntl;
> +}
> +
>  /**
>   * sdma_v4_0_gfx_resume - setup and start the async dma engines
>   *
> @@ -686,7 +784,6 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
>  {
>  	struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
>  	u32 rb_cntl, ib_cntl, wptr_poll_cntl;
> -	u32 rb_bufsz;
>  	u32 wb_offset;
>  	u32 doorbell;
>  	u32 doorbell_offset;
> @@ -694,15 +791,8 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
>  
>  	wb_offset = (ring->rptr_offs * 4);
>  
> -	/* Set ring buffer size in dwords */
> -	rb_bufsz = order_base_2(ring->ring_size / 4);
>  	rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
> -	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
> -#ifdef __BIG_ENDIAN
> -	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
> -	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
> -				RPTR_WRITEBACK_SWAP_ENABLE, 1);
> -#endif
> +	rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
>  	WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
>  
>  	/* Initialize the ring buffer's read and write pointers */
> @@ -717,7 +807,8 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
>  	WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO,
>  	       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
>  
> -	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
> +	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
> +				RPTR_WRITEBACK_ENABLE, 1);
>  
>  	WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8);
>  	WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
> @@ -730,13 +821,11 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
>  	doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL);
>  	doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET);
>  
> -	if (ring->use_doorbell) {
> -		doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
> -		doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
> -				OFFSET, ring->doorbell_index);
> -	} else {
> -		doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
> -	}
> +	doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE,
> +				 ring->use_doorbell);
> +	doorbell_offset = REG_SET_FIELD(doorbell_offset,
> +					SDMA0_GFX_DOORBELL_OFFSET,
> +					OFFSET, ring->doorbell_index);
>  	WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell);
>  	WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset);
>  	adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
> @@ -754,10 +843,9 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
>  	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI,
>  		    upper_32_bits(wptr_gpu_addr));
>  	wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL);
> -	if (amdgpu_sriov_vf(adev))
> -		wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
> -	else
> -		wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
> +	wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
> +				       SDMA0_GFX_RB_WPTR_POLL_CNTL,
> +				       F32_POLL_ENABLE, amdgpu_sriov_vf(adev));
>  	WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
>  
>  	/* enable DMA RB */
> @@ -775,6 +863,99 @@ static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
>  	ring->ready = true;
>  }
>  
> +/**
> + * sdma_v4_0_page_resume - setup and start the async dma engines
> + *
> + * @adev: amdgpu_device pointer
> + * @i: instance to resume
> + *
> + * Set up the page DMA ring buffers and enable them (VEGA10).
> + * Returns 0 for success, error for failure.
> + */
> +static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i)
> +{
> +	struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
> +	u32 rb_cntl, ib_cntl, wptr_poll_cntl;
> +	u32 wb_offset;
> +	u32 doorbell;
> +	u32 doorbell_offset;
> +	u64 wptr_gpu_addr;
> +
> +	wb_offset = (ring->rptr_offs * 4);
> +
> +	rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL);
> +	rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl);
> +	WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
> +
> +	/* Initialize the ring buffer's read and write pointers */
> +	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR, 0);
> +	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_HI, 0);
> +	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR, 0);
> +	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_HI, 0);
> +
> +	/* set the wb address whether it's enabled or not */
> +	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI,
> +	       upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
> +	WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO,
> +	       lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
> +
> +	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL,
> +				RPTR_WRITEBACK_ENABLE, 1);
> +
> +	WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE, ring->gpu_addr >> 8);
> +	WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
> +
> +	ring->wptr = 0;
> +
> +	/* before programing wptr to a less value, need set minor_ptr_update first */
> +	WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 1);
> +
> +	doorbell = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL);
> +	doorbell_offset = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET);
> +
> +	doorbell = REG_SET_FIELD(doorbell, SDMA0_PAGE_DOORBELL, ENABLE,
> +				 ring->use_doorbell);
> +	doorbell_offset = REG_SET_FIELD(doorbell_offset,
> +					SDMA0_PAGE_DOORBELL_OFFSET,
> +					OFFSET, ring->doorbell_index);
> +	WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL, doorbell);
> +	WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET, doorbell_offset);
> +	/* TODO: enable doorbell support */
> +	/*adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
> +					      ring->doorbell_index);*/
> +
> +	sdma_v4_0_ring_set_wptr(ring);
> +
> +	/* set minor_ptr_update to 0 after wptr programed */
> +	WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0);
> +
> +	/* setup the wptr shadow polling */
> +	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> +	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO,
> +		    lower_32_bits(wptr_gpu_addr));
> +	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI,
> +		    upper_32_bits(wptr_gpu_addr));
> +	wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL);
> +	wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
> +				       SDMA0_PAGE_RB_WPTR_POLL_CNTL,
> +				       F32_POLL_ENABLE, amdgpu_sriov_vf(adev));
> +	WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
> +
> +	/* enable DMA RB */
> +	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 1);
> +	WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl);
> +
> +	ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL);
> +	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 1);
> +#ifdef __BIG_ENDIAN
> +	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1);
> +#endif
> +	/* enable DMA IBs */
> +	WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl);
> +
> +	ring->ready = true;
> +}
> +
>  static void
>  sdma_v4_1_update_power_gating(struct amdgpu_device *adev, bool enable)
>  {
> @@ -932,6 +1113,7 @@ static int sdma_v4_0_start(struct amdgpu_device *adev)
>  
>  		WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0);
>  		sdma_v4_0_gfx_resume(adev, i);
> +		sdma_v4_0_page_resume(adev, i);
>  
>  		/* set utc l1 enable flag always to 1 */
>  		temp = RREG32_SDMA(i, mmSDMA0_CNTL);
> @@ -1337,6 +1519,19 @@ static int sdma_v4_0_sw_init(void *handle)
>  				     AMDGPU_SDMA_IRQ_TRAP1);
>  		if (r)
>  			return r;
> +
> +		ring = &adev->sdma.instance[i].page;
> +		ring->ring_obj = NULL;
> +		ring->use_doorbell = false;
> +
> +		sprintf(ring->name, "page%d", i);
> +		r = amdgpu_ring_init(adev, ring, 1024,
> +				     &adev->sdma.trap_irq,
> +				     (i == 0) ?
> +				     AMDGPU_SDMA_IRQ_TRAP0 :
> +				     AMDGPU_SDMA_IRQ_TRAP1);
> +		if (r)
> +			return r;
>  	}
>  
>  	return r;
> @@ -1347,8 +1542,10 @@ static int sdma_v4_0_sw_fini(void *handle)
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>  	int i;
>  
> -	for (i = 0; i < adev->sdma.num_instances; i++)
> +	for (i = 0; i < adev->sdma.num_instances; i++) {
>  		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
> +		amdgpu_ring_fini(&adev->sdma.instance[i].page);
> +	}
>  
>  	for (i = 0; i < adev->sdma.num_instances; i++) {
>  		release_firmware(adev->sdma.instance[i].fw);
> @@ -1462,39 +1659,32 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
>  				      struct amdgpu_irq_src *source,
>  				      struct amdgpu_iv_entry *entry)
>  {
> +	uint32_t instance;
> +
>  	DRM_DEBUG("IH: SDMA trap\n");
>  	switch (entry->client_id) {
>  	case SOC15_IH_CLIENTID_SDMA0:
> -		switch (entry->ring_id) {
> -		case 0:
> -			amdgpu_fence_process(&adev->sdma.instance[0].ring);
> -			break;
> -		case 1:
> -			/* XXX compute */
> -			break;
> -		case 2:
> -			/* XXX compute */
> -			break;
> -		case 3:
> -			/* XXX page queue*/
> -			break;
> -		}
> +		instance = 0;
>  		break;
>  	case SOC15_IH_CLIENTID_SDMA1:
> -		switch (entry->ring_id) {
> -		case 0:
> -			amdgpu_fence_process(&adev->sdma.instance[1].ring);
> -			break;
> -		case 1:
> -			/* XXX compute */
> -			break;
> -		case 2:
> -			/* XXX compute */
> -			break;
> -		case 3:
> -			/* XXX page queue*/
> -			break;
> -		}
> +		instance = 1;
> +		break;
> +	default:
> +		return 0;
> +	}
> +
> +	switch (entry->ring_id) {
> +	case 0:
> +		amdgpu_fence_process(&adev->sdma.instance[instance].ring);
> +		break;
> +	case 1:
> +		/* XXX compute */
> +		break;
> +	case 2:
> +		/* XXX compute */
> +		break;
> +	case 3:
> +		amdgpu_fence_process(&adev->sdma.instance[instance].page);
>  		break;
>  	}
>  	return 0;
> @@ -1722,6 +1912,38 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
>  	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
>  };
>  
> +static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
> +	.type = AMDGPU_RING_TYPE_SDMA,
> +	.align_mask = 0xf,
> +	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
> +	.support_64bit_ptrs = true,
> +	.vmhub = AMDGPU_MMHUB,
> +	.get_rptr = sdma_v4_0_ring_get_rptr,
> +	.get_wptr = sdma_v4_0_page_ring_get_wptr,
> +	.set_wptr = sdma_v4_0_page_ring_set_wptr,
> +	.emit_frame_size =
> +		6 + /* sdma_v4_0_ring_emit_hdp_flush */
> +		3 + /* hdp invalidate */
> +		6 + /* sdma_v4_0_ring_emit_pipeline_sync */
> +		/* sdma_v4_0_ring_emit_vm_flush */
> +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
> +	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
> +	.emit_ib = sdma_v4_0_ring_emit_ib,
> +	.emit_fence = sdma_v4_0_ring_emit_fence,
> +	.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
> +	.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
> +	.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
> +	.test_ring = sdma_v4_0_ring_test_ring,
> +	.test_ib = sdma_v4_0_ring_test_ib,
> +	.insert_nop = sdma_v4_0_ring_insert_nop,
> +	.pad_ib = sdma_v4_0_ring_pad_ib,
> +	.emit_wreg = sdma_v4_0_ring_emit_wreg,
> +	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
> +};
> +
>  static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
>  {
>  	int i;
> @@ -1729,6 +1951,8 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
>  	for (i = 0; i < adev->sdma.num_instances; i++) {
>  		adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs;
>  		adev->sdma.instance[i].ring.me = i;
> +		adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs;
> +		adev->sdma.instance[i].page.me = i;
>  	}
>  }
>  
> -- 
> 2.14.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 8/8] drm/amdgpu: use paging queue for VM page table updates
       [not found]     ` <20181008133521.3237-8-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-10-09  9:43       ` Huang Rui
  0 siblings, 0 replies; 25+ messages in thread
From: Huang Rui @ 2018-10-09  9:43 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Mon, Oct 08, 2018 at 03:35:21PM +0200, Christian König wrote:
> Only for testing, not sure if we should keep it like this.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

OK, I see. Page queue is for page table update. We might need more testing
for this change.

Reviewed-by: Huang Rui <ray.huang@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index a362904d73f7..5fa80b231da3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -2052,7 +2052,7 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev)
>  
>  	adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs;
>  	for (i = 0; i < adev->sdma.num_instances; i++) {
> -		sched = &adev->sdma.instance[i].ring.sched;
> +		sched = &adev->sdma.instance[i].page.sched;
>  		adev->vm_manager.vm_pte_rqs[i] =
>  			&sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL];
>  	}
> -- 
> 2.14.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
  2018-10-09  9:17       ` Huang Rui
@ 2018-10-09 10:56         ` Christian König
       [not found]           ` <5ae6a2fe-80d6-858e-dcd2-2d44ab0b76ce-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Christian König @ 2018-10-09 10:56 UTC (permalink / raw)
  To: Huang Rui, Monk Liu, Frank Min; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 09.10.2018 um 11:17 schrieb Huang Rui:
> On Mon, Oct 08, 2018 at 03:35:15PM +0200, Christian König wrote:
>> [SNIP]
>> -	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
>> -		r = sdma_v4_0_load_microcode(adev);
>> +	/* start the gfx rings and rlc compute queues */
>> +	for (i = 0; i < adev->sdma.num_instances; i++)
>> +		sdma_v4_0_gfx_resume(adev, i);
>> +
>> +	if (amdgpu_sriov_vf(adev)) {
>> +		sdma_v4_0_ctx_switch_enable(adev, true);
>> +		sdma_v4_0_enable(adev, true);
>> +	} else {
>> +		r = sdma_v4_0_rlc_resume(adev);
>>   		if (r)
>>   			return r;
>>   	}
> + Monk, Frank,
>
> I probably cannot judge here, under SRIOV, I saw you disable ctx switch
> before. Do you have any concern if we enabled it here.

The problem was that those calls where mixed into sdma_v4_0_gfx_resume() 
for the first SDMA instance.

What was happening is that SDMA0 was initialized and while doing so 
enabled both SDMA0 and SDMA1. So SDMA1 was starting up before the ring 
buffer was even set.

That this doesn't crashed was pure coincident and is most likely also 
the reason why we ran into problems when ring buffers weren't initialized.

Regards,
Christian.

>
> Others, looks good for me. Christian, may we know which kind of jobs will
> use sdma page queue(ring), you know, we just sdma gfx queue(ring) before?
>
> Thanks,
> Ray
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
       [not found]           ` <5ae6a2fe-80d6-858e-dcd2-2d44ab0b76ce-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2018-10-09 11:45             ` Liu, Monk
       [not found]               ` <CY4PR1201MB024521C2A0EA4BAE7272EA6584E70-1iTaO6aE1DBfNQakwlCMTGrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Liu, Monk @ 2018-10-09 11:45 UTC (permalink / raw)
  To: Koenig, Christian, Huang, Ray, Min, Frank, Ma, Sigil
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Context switch is for preemption across different queues (gfx, rlc0/1, page) under bare-metal environment,
For SRIOV we didn't need it and we didn't test it yet, so we just disable it to make life easier, besides since each VF share only 6 MS slice there is in fact no benefit to enable it for SRIOV ...

+ @Ma, Sigil to confirm

Hi Sigil

Do you think context switch could be enabled for SRIOV VF ?? I worry that the context switch have internal crush with preemption for world switch , thanks !

/Monk

-----Original Message-----
From: Christian König <ckoenig.leichtzumerken@gmail.com> 
Sent: Tuesday, October 9, 2018 6:57 PM
To: Huang, Ray <Ray.Huang@amd.com>; Liu, Monk <Monk.Liu@amd.com>; Min, Frank <Frank.Min@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV

Am 09.10.2018 um 11:17 schrieb Huang Rui:
> On Mon, Oct 08, 2018 at 03:35:15PM +0200, Christian König wrote:
>> [SNIP]
>> -	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
>> -		r = sdma_v4_0_load_microcode(adev);
>> +	/* start the gfx rings and rlc compute queues */
>> +	for (i = 0; i < adev->sdma.num_instances; i++)
>> +		sdma_v4_0_gfx_resume(adev, i);
>> +
>> +	if (amdgpu_sriov_vf(adev)) {
>> +		sdma_v4_0_ctx_switch_enable(adev, true);
>> +		sdma_v4_0_enable(adev, true);
>> +	} else {
>> +		r = sdma_v4_0_rlc_resume(adev);
>>   		if (r)
>>   			return r;
>>   	}
> + Monk, Frank,
>
> I probably cannot judge here, under SRIOV, I saw you disable ctx 
> switch before. Do you have any concern if we enabled it here.

The problem was that those calls where mixed into sdma_v4_0_gfx_resume() for the first SDMA instance.

What was happening is that SDMA0 was initialized and while doing so enabled both SDMA0 and SDMA1. So SDMA1 was starting up before the ring buffer was even set.

That this doesn't crashed was pure coincident and is most likely also the reason why we ran into problems when ring buffers weren't initialized.

Regards,
Christian.

>
> Others, looks good for me. Christian, may we know which kind of jobs 
> will use sdma page queue(ring), you know, we just sdma gfx queue(ring) before?
>
> Thanks,
> Ray
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
       [not found]               ` <CY4PR1201MB024521C2A0EA4BAE7272EA6584E70-1iTaO6aE1DBfNQakwlCMTGrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
@ 2018-10-09 13:03                 ` Koenig, Christian
       [not found]                   ` <dbab4a65-d9ec-8ac7-75bb-86033de043f5-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Koenig, Christian @ 2018-10-09 13:03 UTC (permalink / raw)
  To: Liu, Monk, Huang, Ray, Min, Frank, Ma, Sigil
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Hi Monk,

well that doesn't make much sense to me what you say here cause context 
switching certainly is already enabled under SRIOV:

> -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence 
> doesn't need below to lines */
> -                       sdma_v4_0_ctx_switch_enable(adev, true);
> -                       sdma_v4_0_enable(adev, true);
> -               }

The problem is that context switching as well as the gfx ring is enabled 
for both SDMA0 and SDMA1 without initializing SDMA1.

That's most likely causing some unwanted consequences.

Christian.

Am 09.10.2018 um 13:45 schrieb Liu, Monk:
> Context switch is for preemption across different queues (gfx, rlc0/1, page) under bare-metal environment,
> For SRIOV we didn't need it and we didn't test it yet, so we just disable it to make life easier, besides since each VF share only 6 MS slice there is in fact no benefit to enable it for SRIOV ...
>
> + @Ma, Sigil to confirm
>
> Hi Sigil
>
> Do you think context switch could be enabled for SRIOV VF ?? I worry that the context switch have internal crush with preemption for world switch , thanks !
>
> /Monk
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Tuesday, October 9, 2018 6:57 PM
> To: Huang, Ray <Ray.Huang@amd.com>; Liu, Monk <Monk.Liu@amd.com>; Min, Frank <Frank.Min@amd.com>
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
>
> Am 09.10.2018 um 11:17 schrieb Huang Rui:
>> On Mon, Oct 08, 2018 at 03:35:15PM +0200, Christian König wrote:
>>> [SNIP]
>>> -	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
>>> -		r = sdma_v4_0_load_microcode(adev);
>>> +	/* start the gfx rings and rlc compute queues */
>>> +	for (i = 0; i < adev->sdma.num_instances; i++)
>>> +		sdma_v4_0_gfx_resume(adev, i);
>>> +
>>> +	if (amdgpu_sriov_vf(adev)) {
>>> +		sdma_v4_0_ctx_switch_enable(adev, true);
>>> +		sdma_v4_0_enable(adev, true);
>>> +	} else {
>>> +		r = sdma_v4_0_rlc_resume(adev);
>>>    		if (r)
>>>    			return r;
>>>    	}
>> + Monk, Frank,
>>
>> I probably cannot judge here, under SRIOV, I saw you disable ctx
>> switch before. Do you have any concern if we enabled it here.
> The problem was that those calls where mixed into sdma_v4_0_gfx_resume() for the first SDMA instance.
>
> What was happening is that SDMA0 was initialized and while doing so enabled both SDMA0 and SDMA1. So SDMA1 was starting up before the ring buffer was even set.
>
> That this doesn't crashed was pure coincident and is most likely also the reason why we ran into problems when ring buffers weren't initialized.
>
> Regards,
> Christian.
>
>> Others, looks good for me. Christian, may we know which kind of jobs
>> will use sdma page queue(ring), you know, we just sdma gfx queue(ring) before?
>>
>> Thanks,
>> Ray
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
       [not found]                   ` <dbab4a65-d9ec-8ac7-75bb-86033de043f5-5C7GfCeVMHo@public.gmane.org>
@ 2018-10-10  6:53                     ` Liu, Monk
       [not found]                       ` <CY4PR1201MB0245F26FFD7EE7558A7401B984E00-1iTaO6aE1DBfNQakwlCMTGrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Liu, Monk @ 2018-10-10  6:53 UTC (permalink / raw)
  To: Koenig, Christian, Huang, Ray, Min, Frank, Ma, Sigil
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Oh, that mean I remember it reversed way, according to code looks we need to enable ctx_switch to support WORLD SWITCH for SDMA engine

But better let Sigil confirm it  ...

Hi @Ma, Sigil can you confirm it ? what's the relationship between ctx_swich and world swich for SDMA engines ? 

Ctx_switch_enable() will set "SDMA0/1_CNTL's field: AUTO_CTXSW_ENABLE" to 1, can you tell us what's it for and how it go with SRIOV world switch ? 

Thanks 

/Monk

-----Original Message-----
From: Koenig, Christian 
Sent: Tuesday, October 9, 2018 9:03 PM
To: Liu, Monk <Monk.Liu@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Min, Frank <Frank.Min@amd.com>; Ma, Sigil <Sigil.Ma@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV

Hi Monk,

well that doesn't make much sense to me what you say here cause context switching certainly is already enabled under SRIOV:

> -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence 
> doesn't need below to lines */
> -                       sdma_v4_0_ctx_switch_enable(adev, true);
> -                       sdma_v4_0_enable(adev, true);
> -               }

The problem is that context switching as well as the gfx ring is enabled for both SDMA0 and SDMA1 without initializing SDMA1.

That's most likely causing some unwanted consequences.

Christian.

Am 09.10.2018 um 13:45 schrieb Liu, Monk:
> Context switch is for preemption across different queues (gfx, rlc0/1, 
> page) under bare-metal environment, For SRIOV we didn't need it and we didn't test it yet, so we just disable it to make life easier, besides since each VF share only 6 MS slice there is in fact no benefit to enable it for SRIOV ...
>
> + @Ma, Sigil to confirm
>
> Hi Sigil
>
> Do you think context switch could be enabled for SRIOV VF ?? I worry that the context switch have internal crush with preemption for world switch , thanks !
>
> /Monk
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Tuesday, October 9, 2018 6:57 PM
> To: Huang, Ray <Ray.Huang@amd.com>; Liu, Monk <Monk.Liu@amd.com>; Min, 
> Frank <Frank.Min@amd.com>
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
>
> Am 09.10.2018 um 11:17 schrieb Huang Rui:
>> On Mon, Oct 08, 2018 at 03:35:15PM +0200, Christian König wrote:
>>> [SNIP]
>>> -	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
>>> -		r = sdma_v4_0_load_microcode(adev);
>>> +	/* start the gfx rings and rlc compute queues */
>>> +	for (i = 0; i < adev->sdma.num_instances; i++)
>>> +		sdma_v4_0_gfx_resume(adev, i);
>>> +
>>> +	if (amdgpu_sriov_vf(adev)) {
>>> +		sdma_v4_0_ctx_switch_enable(adev, true);
>>> +		sdma_v4_0_enable(adev, true);
>>> +	} else {
>>> +		r = sdma_v4_0_rlc_resume(adev);
>>>    		if (r)
>>>    			return r;
>>>    	}
>> + Monk, Frank,
>>
>> I probably cannot judge here, under SRIOV, I saw you disable ctx 
>> switch before. Do you have any concern if we enabled it here.
> The problem was that those calls where mixed into sdma_v4_0_gfx_resume() for the first SDMA instance.
>
> What was happening is that SDMA0 was initialized and while doing so enabled both SDMA0 and SDMA1. So SDMA1 was starting up before the ring buffer was even set.
>
> That this doesn't crashed was pure coincident and is most likely also the reason why we ran into problems when ring buffers weren't initialized.
>
> Regards,
> Christian.
>
>> Others, looks good for me. Christian, may we know which kind of jobs 
>> will use sdma page queue(ring), you know, we just sdma gfx queue(ring) before?
>>
>> Thanks,
>> Ray
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
       [not found]                       ` <CY4PR1201MB0245F26FFD7EE7558A7401B984E00-1iTaO6aE1DBfNQakwlCMTGrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
@ 2018-10-10  7:24                         ` Ma, Sigil
       [not found]                           ` <CY4PR12MB1351B3D0E5E5A75BFAF7F0D487E00-rpdhrqHFk04aRV2spazHLQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Ma, Sigil @ 2018-10-10  7:24 UTC (permalink / raw)
  To: Liu, Monk, Koenig, Christian, Huang, Ray, Min, Frank
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Hi Monk, 

AUTO_CTXSW_ENABLE is not relevant to worldswitch preemption. it only applies for ring buffer preemption. SDMA will do worldswitch whatever AUTO_CTXSW_ENABLE is 1 or 0.

-----Original Message-----
From: Liu, Monk 
Sent: Wednesday, October 10, 2018 2:54 PM
To: Koenig, Christian <Christian.Koenig@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Min, Frank <Frank.Min@amd.com>; Ma, Sigil <Sigil.Ma@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV

Oh, that mean I remember it reversed way, according to code looks we need to enable ctx_switch to support WORLD SWITCH for SDMA engine

But better let Sigil confirm it  ...

Hi @Ma, Sigil can you confirm it ? what's the relationship between ctx_swich and world swich for SDMA engines ? 

Ctx_switch_enable() will set "SDMA0/1_CNTL's field: AUTO_CTXSW_ENABLE" to 1, can you tell us what's it for and how it go with SRIOV world switch ? 

Thanks 

/Monk

-----Original Message-----
From: Koenig, Christian
Sent: Tuesday, October 9, 2018 9:03 PM
To: Liu, Monk <Monk.Liu@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Min, Frank <Frank.Min@amd.com>; Ma, Sigil <Sigil.Ma@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV

Hi Monk,

well that doesn't make much sense to me what you say here cause context switching certainly is already enabled under SRIOV:

> -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence 
> doesn't need below to lines */
> -                       sdma_v4_0_ctx_switch_enable(adev, true);
> -                       sdma_v4_0_enable(adev, true);
> -               }

The problem is that context switching as well as the gfx ring is enabled for both SDMA0 and SDMA1 without initializing SDMA1.

That's most likely causing some unwanted consequences.

Christian.

Am 09.10.2018 um 13:45 schrieb Liu, Monk:
> Context switch is for preemption across different queues (gfx, rlc0/1,
> page) under bare-metal environment, For SRIOV we didn't need it and we didn't test it yet, so we just disable it to make life easier, besides since each VF share only 6 MS slice there is in fact no benefit to enable it for SRIOV ...
>
> + @Ma, Sigil to confirm
>
> Hi Sigil
>
> Do you think context switch could be enabled for SRIOV VF ?? I worry that the context switch have internal crush with preemption for world switch , thanks !
>
> /Monk
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Tuesday, October 9, 2018 6:57 PM
> To: Huang, Ray <Ray.Huang@amd.com>; Liu, Monk <Monk.Liu@amd.com>; Min, 
> Frank <Frank.Min@amd.com>
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
>
> Am 09.10.2018 um 11:17 schrieb Huang Rui:
>> On Mon, Oct 08, 2018 at 03:35:15PM +0200, Christian König wrote:
>>> [SNIP]
>>> -	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
>>> -		r = sdma_v4_0_load_microcode(adev);
>>> +	/* start the gfx rings and rlc compute queues */
>>> +	for (i = 0; i < adev->sdma.num_instances; i++)
>>> +		sdma_v4_0_gfx_resume(adev, i);
>>> +
>>> +	if (amdgpu_sriov_vf(adev)) {
>>> +		sdma_v4_0_ctx_switch_enable(adev, true);
>>> +		sdma_v4_0_enable(adev, true);
>>> +	} else {
>>> +		r = sdma_v4_0_rlc_resume(adev);
>>>    		if (r)
>>>    			return r;
>>>    	}
>> + Monk, Frank,
>>
>> I probably cannot judge here, under SRIOV, I saw you disable ctx 
>> switch before. Do you have any concern if we enabled it here.
> The problem was that those calls where mixed into sdma_v4_0_gfx_resume() for the first SDMA instance.
>
> What was happening is that SDMA0 was initialized and while doing so enabled both SDMA0 and SDMA1. So SDMA1 was starting up before the ring buffer was even set.
>
> That this doesn't crashed was pure coincident and is most likely also the reason why we ran into problems when ring buffers weren't initialized.
>
> Regards,
> Christian.
>
>> Others, looks good for me. Christian, may we know which kind of jobs 
>> will use sdma page queue(ring), you know, we just sdma gfx queue(ring) before?
>>
>> Thanks,
>> Ray
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* RE: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
       [not found]                           ` <CY4PR12MB1351B3D0E5E5A75BFAF7F0D487E00-rpdhrqHFk04aRV2spazHLQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2018-10-10  7:52                             ` Liu, Monk
       [not found]                               ` <CY4PR1201MB024507BB222336DFA92304C784E00-1iTaO6aE1DBfNQakwlCMTGrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Liu, Monk @ 2018-10-10  7:52 UTC (permalink / raw)
  To: Ma, Sigil, Koenig, Christian, Huang, Ray, Min, Frank
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Thanks Sigil

Hi Christian

Looks we can enable/disable ctx-switch for SDMA at will, no dependency or conflict on SRIOV 

/Monk

-----Original Message-----
From: Ma, Sigil 
Sent: Wednesday, October 10, 2018 3:25 PM
To: Liu, Monk <Monk.Liu@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Min, Frank <Frank.Min@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV

Hi Monk, 

AUTO_CTXSW_ENABLE is not relevant to worldswitch preemption. it only applies for ring buffer preemption. SDMA will do worldswitch whatever AUTO_CTXSW_ENABLE is 1 or 0.

-----Original Message-----
From: Liu, Monk
Sent: Wednesday, October 10, 2018 2:54 PM
To: Koenig, Christian <Christian.Koenig@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Min, Frank <Frank.Min@amd.com>; Ma, Sigil <Sigil.Ma@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: RE: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV

Oh, that mean I remember it reversed way, according to code looks we need to enable ctx_switch to support WORLD SWITCH for SDMA engine

But better let Sigil confirm it  ...

Hi @Ma, Sigil can you confirm it ? what's the relationship between ctx_swich and world swich for SDMA engines ? 

Ctx_switch_enable() will set "SDMA0/1_CNTL's field: AUTO_CTXSW_ENABLE" to 1, can you tell us what's it for and how it go with SRIOV world switch ? 

Thanks 

/Monk

-----Original Message-----
From: Koenig, Christian
Sent: Tuesday, October 9, 2018 9:03 PM
To: Liu, Monk <Monk.Liu@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Min, Frank <Frank.Min@amd.com>; Ma, Sigil <Sigil.Ma@amd.com>
Cc: amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV

Hi Monk,

well that doesn't make much sense to me what you say here cause context switching certainly is already enabled under SRIOV:

> -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence 
> doesn't need below to lines */
> -                       sdma_v4_0_ctx_switch_enable(adev, true);
> -                       sdma_v4_0_enable(adev, true);
> -               }

The problem is that context switching as well as the gfx ring is enabled for both SDMA0 and SDMA1 without initializing SDMA1.

That's most likely causing some unwanted consequences.

Christian.

Am 09.10.2018 um 13:45 schrieb Liu, Monk:
> Context switch is for preemption across different queues (gfx, rlc0/1,
> page) under bare-metal environment, For SRIOV we didn't need it and we didn't test it yet, so we just disable it to make life easier, besides since each VF share only 6 MS slice there is in fact no benefit to enable it for SRIOV ...
>
> + @Ma, Sigil to confirm
>
> Hi Sigil
>
> Do you think context switch could be enabled for SRIOV VF ?? I worry that the context switch have internal crush with preemption for world switch , thanks !
>
> /Monk
>
> -----Original Message-----
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> Sent: Tuesday, October 9, 2018 6:57 PM
> To: Huang, Ray <Ray.Huang@amd.com>; Liu, Monk <Monk.Liu@amd.com>; Min, 
> Frank <Frank.Min@amd.com>
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
>
> Am 09.10.2018 um 11:17 schrieb Huang Rui:
>> On Mon, Oct 08, 2018 at 03:35:15PM +0200, Christian König wrote:
>>> [SNIP]
>>> -	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
>>> -		r = sdma_v4_0_load_microcode(adev);
>>> +	/* start the gfx rings and rlc compute queues */
>>> +	for (i = 0; i < adev->sdma.num_instances; i++)
>>> +		sdma_v4_0_gfx_resume(adev, i);
>>> +
>>> +	if (amdgpu_sriov_vf(adev)) {
>>> +		sdma_v4_0_ctx_switch_enable(adev, true);
>>> +		sdma_v4_0_enable(adev, true);
>>> +	} else {
>>> +		r = sdma_v4_0_rlc_resume(adev);
>>>    		if (r)
>>>    			return r;
>>>    	}
>> + Monk, Frank,
>>
>> I probably cannot judge here, under SRIOV, I saw you disable ctx 
>> switch before. Do you have any concern if we enabled it here.
> The problem was that those calls where mixed into sdma_v4_0_gfx_resume() for the first SDMA instance.
>
> What was happening is that SDMA0 was initialized and while doing so enabled both SDMA0 and SDMA1. So SDMA1 was starting up before the ring buffer was even set.
>
> That this doesn't crashed was pure coincident and is most likely also the reason why we ran into problems when ring buffers weren't initialized.
>
> Regards,
> Christian.
>
>> Others, looks good for me. Christian, may we know which kind of jobs 
>> will use sdma page queue(ring), you know, we just sdma gfx queue(ring) before?
>>
>> Thanks,
>> Ray
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
       [not found]                               ` <CY4PR1201MB024507BB222336DFA92304C784E00-1iTaO6aE1DBfNQakwlCMTGrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
@ 2018-10-12 14:27                                 ` Koenig, Christian
       [not found]                                   ` <8d7c9d2e-6d4d-34d3-d8dc-102e253610f2-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Koenig, Christian @ 2018-10-12 14:27 UTC (permalink / raw)
  To: Liu, Monk, Ma, Sigil, Huang, Ray, Min, Frank
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Great, can I get an rb or acked-by for the patch in this case?

Thanks,
Christian.

Am 10.10.2018 um 09:52 schrieb Liu, Monk:
> Thanks Sigil
>
> Hi Christian
>
> Looks we can enable/disable ctx-switch for SDMA at will, no dependency or conflict on SRIOV
>
> /Monk
>
> -----Original Message-----
> From: Ma, Sigil
> Sent: Wednesday, October 10, 2018 3:25 PM
> To: Liu, Monk <Monk.Liu@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Min, Frank <Frank.Min@amd.com>
> Cc: amd-gfx@lists.freedesktop.org
> Subject: RE: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
>
> Hi Monk,
>
> AUTO_CTXSW_ENABLE is not relevant to worldswitch preemption. it only applies for ring buffer preemption. SDMA will do worldswitch whatever AUTO_CTXSW_ENABLE is 1 or 0.
>
> -----Original Message-----
> From: Liu, Monk
> Sent: Wednesday, October 10, 2018 2:54 PM
> To: Koenig, Christian <Christian.Koenig@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Min, Frank <Frank.Min@amd.com>; Ma, Sigil <Sigil.Ma@amd.com>
> Cc: amd-gfx@lists.freedesktop.org
> Subject: RE: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
>
> Oh, that mean I remember it reversed way, according to code looks we need to enable ctx_switch to support WORLD SWITCH for SDMA engine
>
> But better let Sigil confirm it  ...
>
> Hi @Ma, Sigil can you confirm it ? what's the relationship between ctx_swich and world swich for SDMA engines ?
>
> Ctx_switch_enable() will set "SDMA0/1_CNTL's field: AUTO_CTXSW_ENABLE" to 1, can you tell us what's it for and how it go with SRIOV world switch ?
>
> Thanks
>
> /Monk
>
> -----Original Message-----
> From: Koenig, Christian
> Sent: Tuesday, October 9, 2018 9:03 PM
> To: Liu, Monk <Monk.Liu@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Min, Frank <Frank.Min@amd.com>; Ma, Sigil <Sigil.Ma@amd.com>
> Cc: amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
>
> Hi Monk,
>
> well that doesn't make much sense to me what you say here cause context switching certainly is already enabled under SRIOV:
>
>> -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence
>> doesn't need below to lines */
>> -                       sdma_v4_0_ctx_switch_enable(adev, true);
>> -                       sdma_v4_0_enable(adev, true);
>> -               }
> The problem is that context switching as well as the gfx ring is enabled for both SDMA0 and SDMA1 without initializing SDMA1.
>
> That's most likely causing some unwanted consequences.
>
> Christian.
>
> Am 09.10.2018 um 13:45 schrieb Liu, Monk:
>> Context switch is for preemption across different queues (gfx, rlc0/1,
>> page) under bare-metal environment, For SRIOV we didn't need it and we didn't test it yet, so we just disable it to make life easier, besides since each VF share only 6 MS slice there is in fact no benefit to enable it for SRIOV ...
>>
>> + @Ma, Sigil to confirm
>>
>> Hi Sigil
>>
>> Do you think context switch could be enabled for SRIOV VF ?? I worry that the context switch have internal crush with preemption for world switch , thanks !
>>
>> /Monk
>>
>> -----Original Message-----
>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>> Sent: Tuesday, October 9, 2018 6:57 PM
>> To: Huang, Ray <Ray.Huang@amd.com>; Liu, Monk <Monk.Liu@amd.com>; Min,
>> Frank <Frank.Min@amd.com>
>> Cc: amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
>>
>> Am 09.10.2018 um 11:17 schrieb Huang Rui:
>>> On Mon, Oct 08, 2018 at 03:35:15PM +0200, Christian König wrote:
>>>> [SNIP]
>>>> -	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
>>>> -		r = sdma_v4_0_load_microcode(adev);
>>>> +	/* start the gfx rings and rlc compute queues */
>>>> +	for (i = 0; i < adev->sdma.num_instances; i++)
>>>> +		sdma_v4_0_gfx_resume(adev, i);
>>>> +
>>>> +	if (amdgpu_sriov_vf(adev)) {
>>>> +		sdma_v4_0_ctx_switch_enable(adev, true);
>>>> +		sdma_v4_0_enable(adev, true);
>>>> +	} else {
>>>> +		r = sdma_v4_0_rlc_resume(adev);
>>>>     		if (r)
>>>>     			return r;
>>>>     	}
>>> + Monk, Frank,
>>>
>>> I probably cannot judge here, under SRIOV, I saw you disable ctx
>>> switch before. Do you have any concern if we enabled it here.
>> The problem was that those calls where mixed into sdma_v4_0_gfx_resume() for the first SDMA instance.
>>
>> What was happening is that SDMA0 was initialized and while doing so enabled both SDMA0 and SDMA1. So SDMA1 was starting up before the ring buffer was even set.
>>
>> That this doesn't crashed was pure coincident and is most likely also the reason why we ran into problems when ring buffers weren't initialized.
>>
>> Regards,
>> Christian.
>>
>>> Others, looks good for me. Christian, may we know which kind of jobs
>>> will use sdma page queue(ring), you know, we just sdma gfx queue(ring) before?
>>>
>>> Thanks,
>>> Ray
>>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
       [not found]                                   ` <8d7c9d2e-6d4d-34d3-d8dc-102e253610f2-5C7GfCeVMHo@public.gmane.org>
@ 2018-10-16 12:34                                     ` Christian König
       [not found]                                       ` <2512cfee-a603-75c4-bf10-9ae0b4b8c5c7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 25+ messages in thread
From: Christian König @ 2018-10-16 12:34 UTC (permalink / raw)
  To: Koenig, Christian, Liu, Monk, Ma, Sigil, Huang, Ray, Min, Frank
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Another gentle ping? Ray, Monk can anybody give me an rb for this patch?

It's the last one in this series and I want to get it done.

Christian.

Am 12.10.2018 um 16:27 schrieb Koenig, Christian:
> Great, can I get an rb or acked-by for the patch in this case?
>
> Thanks,
> Christian.
>
> Am 10.10.2018 um 09:52 schrieb Liu, Monk:
>> Thanks Sigil
>>
>> Hi Christian
>>
>> Looks we can enable/disable ctx-switch for SDMA at will, no dependency or conflict on SRIOV
>>
>> /Monk
>>
>> -----Original Message-----
>> From: Ma, Sigil
>> Sent: Wednesday, October 10, 2018 3:25 PM
>> To: Liu, Monk <Monk.Liu@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Min, Frank <Frank.Min@amd.com>
>> Cc: amd-gfx@lists.freedesktop.org
>> Subject: RE: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
>>
>> Hi Monk,
>>
>> AUTO_CTXSW_ENABLE is not relevant to worldswitch preemption. it only applies for ring buffer preemption. SDMA will do worldswitch whatever AUTO_CTXSW_ENABLE is 1 or 0.
>>
>> -----Original Message-----
>> From: Liu, Monk
>> Sent: Wednesday, October 10, 2018 2:54 PM
>> To: Koenig, Christian <Christian.Koenig@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Min, Frank <Frank.Min@amd.com>; Ma, Sigil <Sigil.Ma@amd.com>
>> Cc: amd-gfx@lists.freedesktop.org
>> Subject: RE: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
>>
>> Oh, that mean I remember it reversed way, according to code looks we need to enable ctx_switch to support WORLD SWITCH for SDMA engine
>>
>> But better let Sigil confirm it  ...
>>
>> Hi @Ma, Sigil can you confirm it ? what's the relationship between ctx_swich and world swich for SDMA engines ?
>>
>> Ctx_switch_enable() will set "SDMA0/1_CNTL's field: AUTO_CTXSW_ENABLE" to 1, can you tell us what's it for and how it go with SRIOV world switch ?
>>
>> Thanks
>>
>> /Monk
>>
>> -----Original Message-----
>> From: Koenig, Christian
>> Sent: Tuesday, October 9, 2018 9:03 PM
>> To: Liu, Monk <Monk.Liu@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Min, Frank <Frank.Min@amd.com>; Ma, Sigil <Sigil.Ma@amd.com>
>> Cc: amd-gfx@lists.freedesktop.org
>> Subject: Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
>>
>> Hi Monk,
>>
>> well that doesn't make much sense to me what you say here cause context switching certainly is already enabled under SRIOV:
>>
>>> -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence
>>> doesn't need below to lines */
>>> -                       sdma_v4_0_ctx_switch_enable(adev, true);
>>> -                       sdma_v4_0_enable(adev, true);
>>> -               }
>> The problem is that context switching as well as the gfx ring is enabled for both SDMA0 and SDMA1 without initializing SDMA1.
>>
>> That's most likely causing some unwanted consequences.
>>
>> Christian.
>>
>> Am 09.10.2018 um 13:45 schrieb Liu, Monk:
>>> Context switch is for preemption across different queues (gfx, rlc0/1,
>>> page) under bare-metal environment, For SRIOV we didn't need it and we didn't test it yet, so we just disable it to make life easier, besides since each VF share only 6 MS slice there is in fact no benefit to enable it for SRIOV ...
>>>
>>> + @Ma, Sigil to confirm
>>>
>>> Hi Sigil
>>>
>>> Do you think context switch could be enabled for SRIOV VF ?? I worry that the context switch have internal crush with preemption for world switch , thanks !
>>>
>>> /Monk
>>>
>>> -----Original Message-----
>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>> Sent: Tuesday, October 9, 2018 6:57 PM
>>> To: Huang, Ray <Ray.Huang@amd.com>; Liu, Monk <Monk.Liu@amd.com>; Min,
>>> Frank <Frank.Min@amd.com>
>>> Cc: amd-gfx@lists.freedesktop.org
>>> Subject: Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
>>>
>>> Am 09.10.2018 um 11:17 schrieb Huang Rui:
>>>> On Mon, Oct 08, 2018 at 03:35:15PM +0200, Christian König wrote:
>>>>> [SNIP]
>>>>> -	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
>>>>> -		r = sdma_v4_0_load_microcode(adev);
>>>>> +	/* start the gfx rings and rlc compute queues */
>>>>> +	for (i = 0; i < adev->sdma.num_instances; i++)
>>>>> +		sdma_v4_0_gfx_resume(adev, i);
>>>>> +
>>>>> +	if (amdgpu_sriov_vf(adev)) {
>>>>> +		sdma_v4_0_ctx_switch_enable(adev, true);
>>>>> +		sdma_v4_0_enable(adev, true);
>>>>> +	} else {
>>>>> +		r = sdma_v4_0_rlc_resume(adev);
>>>>>      		if (r)
>>>>>      			return r;
>>>>>      	}
>>>> + Monk, Frank,
>>>>
>>>> I probably cannot judge here, under SRIOV, I saw you disable ctx
>>>> switch before. Do you have any concern if we enabled it here.
>>> The problem was that those calls where mixed into sdma_v4_0_gfx_resume() for the first SDMA instance.
>>>
>>> What was happening is that SDMA0 was initialized and while doing so enabled both SDMA0 and SDMA1. So SDMA1 was starting up before the ring buffer was even set.
>>>
>>> That this doesn't crashed was pure coincident and is most likely also the reason why we ran into problems when ring buffers weren't initialized.
>>>
>>> Regards,
>>> Christian.
>>>
>>>> Others, looks good for me. Christian, may we know which kind of jobs
>>>> will use sdma page queue(ring), you know, we just sdma gfx queue(ring) before?
>>>>
>>>> Thanks,
>>>> Ray
>>>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
       [not found]                                       ` <2512cfee-a603-75c4-bf10-9ae0b4b8c5c7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2018-10-16 14:42                                         ` Huang Rui
  0 siblings, 0 replies; 25+ messages in thread
From: Huang Rui @ 2018-10-16 14:42 UTC (permalink / raw)
  To: Koenig, Christian
  Cc: Min, Frank, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Liu, Monk,
	Ma, Sigil

On Tue, Oct 16, 2018 at 08:34:37PM +0800, Christian König wrote:
> Another gentle ping? Ray, Monk can anybody give me an rb for this patch?
> 
> It's the last one in this series and I want to get it done.
> 

Feel free to add

Reviewed-by: Huang Rui <ray.huang@amd.com>

> Christian.
> 
> Am 12.10.2018 um 16:27 schrieb Koenig, Christian:
> > Great, can I get an rb or acked-by for the patch in this case?
> >
> > Thanks,
> > Christian.
> >
> > Am 10.10.2018 um 09:52 schrieb Liu, Monk:
> >> Thanks Sigil
> >>
> >> Hi Christian
> >>
> >> Looks we can enable/disable ctx-switch for SDMA at will, no dependency or conflict on SRIOV
> >>
> >> /Monk
> >>
> >> -----Original Message-----
> >> From: Ma, Sigil
> >> Sent: Wednesday, October 10, 2018 3:25 PM
> >> To: Liu, Monk <Monk.Liu@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Min, Frank <Frank.Min@amd.com>
> >> Cc: amd-gfx@lists.freedesktop.org
> >> Subject: RE: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
> >>
> >> Hi Monk,
> >>
> >> AUTO_CTXSW_ENABLE is not relevant to worldswitch preemption. it only applies for ring buffer preemption. SDMA will do worldswitch whatever AUTO_CTXSW_ENABLE is 1 or 0.
> >>
> >> -----Original Message-----
> >> From: Liu, Monk
> >> Sent: Wednesday, October 10, 2018 2:54 PM
> >> To: Koenig, Christian <Christian.Koenig@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Min, Frank <Frank.Min@amd.com>; Ma, Sigil <Sigil.Ma@amd.com>
> >> Cc: amd-gfx@lists.freedesktop.org
> >> Subject: RE: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
> >>
> >> Oh, that mean I remember it reversed way, according to code looks we need to enable ctx_switch to support WORLD SWITCH for SDMA engine
> >>
> >> But better let Sigil confirm it  ...
> >>
> >> Hi @Ma, Sigil can you confirm it ? what's the relationship between ctx_swich and world swich for SDMA engines ?
> >>
> >> Ctx_switch_enable() will set "SDMA0/1_CNTL's field: AUTO_CTXSW_ENABLE" to 1, can you tell us what's it for and how it go with SRIOV world switch ?
> >>
> >> Thanks
> >>
> >> /Monk
> >>
> >> -----Original Message-----
> >> From: Koenig, Christian
> >> Sent: Tuesday, October 9, 2018 9:03 PM
> >> To: Liu, Monk <Monk.Liu@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Min, Frank <Frank.Min@amd.com>; Ma, Sigil <Sigil.Ma@amd.com>
> >> Cc: amd-gfx@lists.freedesktop.org
> >> Subject: Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
> >>
> >> Hi Monk,
> >>
> >> well that doesn't make much sense to me what you say here cause context switching certainly is already enabled under SRIOV:
> >>
> >>> -               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence
> >>> doesn't need below to lines */
> >>> -                       sdma_v4_0_ctx_switch_enable(adev, true);
> >>> -                       sdma_v4_0_enable(adev, true);
> >>> -               }
> >> The problem is that context switching as well as the gfx ring is enabled for both SDMA0 and SDMA1 without initializing SDMA1.
> >>
> >> That's most likely causing some unwanted consequences.
> >>
> >> Christian.
> >>
> >> Am 09.10.2018 um 13:45 schrieb Liu, Monk:
> >>> Context switch is for preemption across different queues (gfx, rlc0/1,
> >>> page) under bare-metal environment, For SRIOV we didn't need it and we didn't test it yet, so we just disable it to make life easier, besides since each VF share only 6 MS slice there is in fact no benefit to enable it for SRIOV ...
> >>>
> >>> + @Ma, Sigil to confirm
> >>>
> >>> Hi Sigil
> >>>
> >>> Do you think context switch could be enabled for SRIOV VF ?? I worry that the context switch have internal crush with preemption for world switch , thanks !
> >>>
> >>> /Monk
> >>>
> >>> -----Original Message-----
> >>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
> >>> Sent: Tuesday, October 9, 2018 6:57 PM
> >>> To: Huang, Ray <Ray.Huang@amd.com>; Liu, Monk <Monk.Liu@amd.com>; Min,
> >>> Frank <Frank.Min@amd.com>
> >>> Cc: amd-gfx@lists.freedesktop.org
> >>> Subject: Re: [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV
> >>>
> >>> Am 09.10.2018 um 11:17 schrieb Huang Rui:
> >>>> On Mon, Oct 08, 2018 at 03:35:15PM +0200, Christian König wrote:
> >>>>> [SNIP]
> >>>>> -	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
> >>>>> -		r = sdma_v4_0_load_microcode(adev);
> >>>>> +	/* start the gfx rings and rlc compute queues */
> >>>>> +	for (i = 0; i < adev->sdma.num_instances; i++)
> >>>>> +		sdma_v4_0_gfx_resume(adev, i);
> >>>>> +
> >>>>> +	if (amdgpu_sriov_vf(adev)) {
> >>>>> +		sdma_v4_0_ctx_switch_enable(adev, true);
> >>>>> +		sdma_v4_0_enable(adev, true);
> >>>>> +	} else {
> >>>>> +		r = sdma_v4_0_rlc_resume(adev);
> >>>>>      		if (r)
> >>>>>      			return r;
> >>>>>      	}
> >>>> + Monk, Frank,
> >>>>
> >>>> I probably cannot judge here, under SRIOV, I saw you disable ctx
> >>>> switch before. Do you have any concern if we enabled it here.
> >>> The problem was that those calls where mixed into sdma_v4_0_gfx_resume() for the first SDMA instance.
> >>>
> >>> What was happening is that SDMA0 was initialized and while doing so enabled both SDMA0 and SDMA1. So SDMA1 was starting up before the ring buffer was even set.
> >>>
> >>> That this doesn't crashed was pure coincident and is most likely also the reason why we ran into problems when ring buffers weren't initialized.
> >>>
> >>> Regards,
> >>> Christian.
> >>>
> >>>> Others, looks good for me. Christian, may we know which kind of jobs
> >>>> will use sdma page queue(ring), you know, we just sdma gfx queue(ring) before?
> >>>>
> >>>> Thanks,
> >>>> Ray
> >>>>
> > _______________________________________________
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2018-10-16 14:42 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-10-08 13:35 [PATCH 1/8] drm/amdgpu: fix incorrect use of amdgpu_irq_add_id in si_dma.c Christian König
     [not found] ` <20181008133521.3237-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-10-08 13:35   ` [PATCH 2/8] drm/amdgpu: fix sdma v4 startup under SRIOV Christian König
     [not found]     ` <20181008133521.3237-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-10-09  9:17       ` Huang Rui
2018-10-09 10:56         ` Christian König
     [not found]           ` <5ae6a2fe-80d6-858e-dcd2-2d44ab0b76ce-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-10-09 11:45             ` Liu, Monk
     [not found]               ` <CY4PR1201MB024521C2A0EA4BAE7272EA6584E70-1iTaO6aE1DBfNQakwlCMTGrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2018-10-09 13:03                 ` Koenig, Christian
     [not found]                   ` <dbab4a65-d9ec-8ac7-75bb-86033de043f5-5C7GfCeVMHo@public.gmane.org>
2018-10-10  6:53                     ` Liu, Monk
     [not found]                       ` <CY4PR1201MB0245F26FFD7EE7558A7401B984E00-1iTaO6aE1DBfNQakwlCMTGrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2018-10-10  7:24                         ` Ma, Sigil
     [not found]                           ` <CY4PR12MB1351B3D0E5E5A75BFAF7F0D487E00-rpdhrqHFk04aRV2spazHLQdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2018-10-10  7:52                             ` Liu, Monk
     [not found]                               ` <CY4PR1201MB024507BB222336DFA92304C784E00-1iTaO6aE1DBfNQakwlCMTGrFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2018-10-12 14:27                                 ` Koenig, Christian
     [not found]                                   ` <8d7c9d2e-6d4d-34d3-d8dc-102e253610f2-5C7GfCeVMHo@public.gmane.org>
2018-10-16 12:34                                     ` Christian König
     [not found]                                       ` <2512cfee-a603-75c4-bf10-9ae0b4b8c5c7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-10-16 14:42                                         ` Huang Rui
2018-10-08 13:35   ` [PATCH 3/8] drm/amdgpu: add basics for SDMA page queue support Christian König
     [not found]     ` <20181008133521.3237-3-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-10-09  9:31       ` Huang Rui
2018-10-08 13:35   ` [PATCH 4/8] drm/amdgpu: remove non gfx specific handling from sdma_v4_0_gfx_resume Christian König
     [not found]     ` <20181008133521.3237-4-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-10-09  9:34       ` Huang Rui
2018-10-08 13:35   ` [PATCH 5/8] drm/amdgpu: remove SRIOV " Christian König
     [not found]     ` <20181008133521.3237-5-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-10-09  9:35       ` Huang Rui
2018-10-08 13:35   ` [PATCH 6/8] drm/amdgpu: add some [WR]REG32_SDMA macros to sdma_v4_0.c Christian König
     [not found]     ` <20181008133521.3237-6-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-10-09  9:36       ` Huang Rui
2018-10-08 13:35   ` [PATCH 7/8] drm/amdgpu: activate paging queue on SDMA v4 Christian König
     [not found]     ` <20181008133521.3237-7-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-10-09  9:40       ` Huang Rui
2018-10-08 13:35   ` [PATCH 8/8] drm/amdgpu: use paging queue for VM page table updates Christian König
     [not found]     ` <20181008133521.3237-8-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-10-09  9:43       ` Huang Rui
2018-10-09  8:37   ` [PATCH 1/8] drm/amdgpu: fix incorrect use of amdgpu_irq_add_id in si_dma.c Huang Rui

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.