All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/13] *** VEGA10 SRIOV PATCHES ***
@ 2017-03-24 10:38 Monk Liu
       [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 34+ messages in thread
From: Monk Liu @ 2017-03-24 10:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

patch 1 is a fix for VI
patch 2 to patch 11 are bug fixings for vega10 for SRIOV
patch 12/13 is the DMAframe scheme change to fix CE VM fault after world switch.


Monk Liu (13):
  drm/amdgpu:imple cond_exec for gfx8
  drm/amdgpu:enable mcbp for gfx9
  drm/amdgpu:add KIQ interrupt id
  drm/amdgpu:virt_init_setting invoke is missed!
  drm/amdgpu:fix ring init sequence
  drm/amdgpu:change sequence of SDMA v4 init
  drm/amdgpu:two fixings for sdma v4 for SRIOV
  drm/amdgpu:no cg for soc15 of SRIOV
  drm/amdgpu:fix gmc_v9 vm fault process for SRIOV
  drm/amdgpu:fix ring_write_multiple
  drm/amdgpu:fix missing programing critical registers
  drm/amdgpu:changes in gfx DMAframe scheme
  drm/amdgpu:change emit_frame_size of gfx8

 drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  8 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  6 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 77 +++++++++++++++++++-------------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c    | 57 ++++++++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 37 ++++++++++-----
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c |  9 ++++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 28 ++++++++----
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  |  9 ++++
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 43 +++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/soc15.c       |  7 +++
 12 files changed, 206 insertions(+), 81 deletions(-)

-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* [PATCH 01/13] drm/amdgpu:imple cond_exec for gfx8
       [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-24 10:38   ` Monk Liu
       [not found]     ` <1490351912-10122-2-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-24 10:38   ` [PATCH 02/13] drm/amdgpu:enable mcbp for gfx9 Monk Liu
                     ` (12 subsequent siblings)
  13 siblings, 1 reply; 34+ messages in thread
From: Monk Liu @ 2017-03-24 10:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

when MCBP enalbed for gfx8, the cond_exec must also
be implemented, otherwise there will be odds to meet
cross engine (ce and me) deadlock when WORLD switch happens.

Change-Id: I6bdb5f91dc6e1b56dcad43741a109a6eb08cb5fa
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 5757300..396c075 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6499,6 +6499,32 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
 			(flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
 }
 
+static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
+{
+	unsigned ret;
+	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
+	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
+	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
+	ret = ring->wptr & ring->buf_mask;
+	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
+	return ret;
+}
+
+static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
+{
+	unsigned cur;
+	BUG_ON(offset > ring->buf_mask);
+	BUG_ON(ring->ring[offset] != 0x55aa55aa);
+
+	cur = (ring->wptr & ring->buf_mask) - 1;
+	if (likely(cur > offset))
+		ring->ring[offset] = cur - offset;
+	else
+		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
+}
+
+
 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
 {
 	struct amdgpu_device *adev = ring->adev;
@@ -6788,6 +6814,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_switch_buffer = gfx_v8_ring_emit_sb,
 	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
+	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
+	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 02/13] drm/amdgpu:enable mcbp for gfx9
       [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-24 10:38   ` [PATCH 01/13] drm/amdgpu:imple cond_exec for gfx8 Monk Liu
@ 2017-03-24 10:38   ` Monk Liu
       [not found]     ` <1490351912-10122-3-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-24 10:38   ` [PATCH 03/13] drm/amdgpu:add KIQ interrupt id Monk Liu
                     ` (11 subsequent siblings)
  13 siblings, 1 reply; 34+ messages in thread
From: Monk Liu @ 2017-03-24 10:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

set bit 21 of IB.control filed to actually enable
MCBP for SRIOV

Change-Id: Ie5126d5be95e037087cf7167c28c61975f40d784
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ad82ab7..0d8fb51 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3016,6 +3016,9 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 
         control |= ib->length_dw | (vm_id << 24);
 
+		if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT))
+			control |= (1 << 21);
+
         amdgpu_ring_write(ring, header);
 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
         amdgpu_ring_write(ring,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 03/13] drm/amdgpu:add KIQ interrupt id
       [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-24 10:38   ` [PATCH 01/13] drm/amdgpu:imple cond_exec for gfx8 Monk Liu
  2017-03-24 10:38   ` [PATCH 02/13] drm/amdgpu:enable mcbp for gfx9 Monk Liu
@ 2017-03-24 10:38   ` Monk Liu
       [not found]     ` <1490351912-10122-4-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-24 10:38   ` [PATCH 04/13] drm/amdgpu:virt_init_setting invoke is missed! Monk Liu
                     ` (10 subsequent siblings)
  13 siblings, 1 reply; 34+ messages in thread
From: Monk Liu @ 2017-03-24 10:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

178 id is for GENERIC2 interrupt which is used by KIQ

Change-Id: I9aa9a90a97b33aec8ca8207d1814eaecdc92667d
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0d8fb51..ce6fa03 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1023,6 +1023,11 @@ static int gfx_v9_0_sw_init(void *handle)
 	struct amdgpu_kiq *kiq;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	/* KIQ event */
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
+	if (r)
+		return r;
+
 	/* EOP Event */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq);
 	if (r)
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 04/13] drm/amdgpu:virt_init_setting invoke is missed!
       [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-03-24 10:38   ` [PATCH 03/13] drm/amdgpu:add KIQ interrupt id Monk Liu
@ 2017-03-24 10:38   ` Monk Liu
       [not found]     ` <1490351912-10122-5-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-24 10:38   ` [PATCH 05/13] drm/amdgpu:fix ring init sequence Monk Liu
                     ` (9 subsequent siblings)
  13 siblings, 1 reply; 34+ messages in thread
From: Monk Liu @ 2017-03-24 10:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

this must be invoked during early init

Change-Id: I68726dd36825259913b47493ba1e9c467b368d0c
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 7e54d9dc..4ebe94b 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -553,6 +553,10 @@ static int soc15_common_early_init(void *handle)
 		(amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP)))
 		psp_enabled = true;
 
+	if (amdgpu_sriov_vf(adev)) {
+		amdgpu_virt_init_setting(adev);
+	}
+
 	/*
 	 * nbio need be used for both sdma and gfx9, but only
 	 * initializes once
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 05/13] drm/amdgpu:fix ring init sequence
       [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (3 preceding siblings ...)
  2017-03-24 10:38   ` [PATCH 04/13] drm/amdgpu:virt_init_setting invoke is missed! Monk Liu
@ 2017-03-24 10:38   ` Monk Liu
       [not found]     ` <1490351912-10122-6-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-24 10:38   ` [PATCH 06/13] drm/amdgpu:change sequence of SDMA v4 init Monk Liu
                     ` (8 subsequent siblings)
  13 siblings, 1 reply; 34+ messages in thread
From: Monk Liu @ 2017-03-24 10:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

ring->buf_mask need be set prior to ring_clear_ring invoke
and fix ring_clear_ring as well which should use buf_mask
instead of ptr_mask

Change-Id: I7778a7afe27ac2bdedcaba1b0146582100602f9d
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index e619833..10e94d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -235,6 +235,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 	ring->ring_size = roundup_pow_of_two(max_dw * 4 *
 					     amdgpu_sched_hw_submission);
 
+	ring->buf_mask = (ring->ring_size / 4) - 1;
+	ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
+		0xffffffffffffffff : ring->buf_mask;
 	/* Allocate ring buffer */
 	if (ring->ring_obj == NULL) {
 		r = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
@@ -248,9 +251,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		}
 		amdgpu_ring_clear_ring(ring);
 	}
-	ring->buf_mask = (ring->ring_size / 4) - 1;
-	ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
-		0xffffffffffffffff : ring->buf_mask;
 
 	ring->max_dw = max_dw;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 9f57eda..5fd3dd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -194,7 +194,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring);
 static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 {
 	int i = 0;
-	while (i <= ring->ptr_mask)
+	while (i <= ring->buf_mask)
 		ring->ring[i++] = ring->funcs->nop;
 
 }
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 06/13] drm/amdgpu:change sequence of SDMA v4 init
       [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (4 preceding siblings ...)
  2017-03-24 10:38   ` [PATCH 05/13] drm/amdgpu:fix ring init sequence Monk Liu
@ 2017-03-24 10:38   ` Monk Liu
       [not found]     ` <1490351912-10122-7-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-24 10:38   ` [PATCH 07/13] drm/amdgpu:two fixings for sdma v4 for SRIOV Monk Liu
                     ` (7 subsequent siblings)
  13 siblings, 1 reply; 34+ messages in thread
From: Monk Liu @ 2017-03-24 10:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

must set minor_update.enable before write smaller value
to wptr/doorbell, so for sriov we need set that register
bit in hw_init period.

this could fix the SDMA ring test fail after guest reboot

Change-Id: Id863396788cc5b35550cdcac405131d41690e77a
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 37 +++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index ee3b4a9..4d9fec8 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -560,8 +560,14 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
 		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
 
 		ring->wptr = 0;
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+
+		/* before programing wptr to a less value, need set minor_ptr_update first */
+		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+
+		if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+			WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+			WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+		}
 
 		doorbell = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL));
 		doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET));
@@ -577,15 +583,23 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
 		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
 		nbio_v6_1_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index);
 
+		if (amdgpu_sriov_vf(adev))
+			sdma_v4_0_ring_set_wptr(ring);
+
+		/* set minor_ptr_update to 0 after wptr programed */
+		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
+
 		/* set utc l1 enable flag always to 1 */
 		temp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL));
 		temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
 		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), temp);
 
-		/* unhalt engine */
-		temp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL));
-		temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
-		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), temp);
+		if (!amdgpu_sriov_vf(adev)) {
+			/* unhalt engine */
+			temp = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL));
+			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+			WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_F32_CNTL), temp);
+		}
 
 		/* enable DMA RB */
 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
@@ -601,6 +615,11 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
 
 		ring->ready = true;
 
+		if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+			sdma_v4_0_ctx_switch_enable(adev, true);
+			sdma_v4_0_enable(adev, true);
+		}
+
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
 			ring->ready = false;
@@ -671,8 +690,6 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
 			(adev->sdma.instance[i].fw->data +
 				le32_to_cpu(hdr->header.ucode_array_offset_bytes));
 
-		sdma_v4_0_print_ucode_regs(adev);
-
 		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), 0);
 
 
@@ -699,10 +716,10 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
  */
 static int sdma_v4_0_start(struct amdgpu_device *adev)
 {
-	int r;
+	int r = 0;
 
 	if (amdgpu_sriov_vf(adev)) {
-		/* disable RB and halt engine */
+		sdma_v4_0_ctx_switch_enable(adev, false);
 		sdma_v4_0_enable(adev, false);
 
 		/* set RB registers */
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 07/13] drm/amdgpu:two fixings for sdma v4 for SRIOV
       [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (5 preceding siblings ...)
  2017-03-24 10:38   ` [PATCH 06/13] drm/amdgpu:change sequence of SDMA v4 init Monk Liu
@ 2017-03-24 10:38   ` Monk Liu
       [not found]     ` <1490351912-10122-8-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-24 10:38   ` [PATCH 08/13] drm/amdgpu:no cg for soc15 of SRIOV Monk Liu
                     ` (6 subsequent siblings)
  13 siblings, 1 reply; 34+ messages in thread
From: Monk Liu @ 2017-03-24 10:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

no hw_fini for SRIOV, otherwise other VF will be affected
no CG for SRIOV

Change-Id: I1b0525eb8d08754b4bd1a6ee6798bf5e41c6bc6b
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 4d9fec8..443f850 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1172,6 +1172,9 @@ static int sdma_v4_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	sdma_v4_0_ctx_switch_enable(adev, false);
 	sdma_v4_0_enable(adev, false);
 
@@ -1406,6 +1409,9 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 		sdma_v4_0_update_medium_grain_clock_gating(adev,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 08/13] drm/amdgpu:no cg for soc15 of SRIOV
       [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (6 preceding siblings ...)
  2017-03-24 10:38   ` [PATCH 07/13] drm/amdgpu:two fixings for sdma v4 for SRIOV Monk Liu
@ 2017-03-24 10:38   ` Monk Liu
       [not found]     ` <1490351912-10122-9-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-24 10:38   ` [PATCH 09/13] drm/amdgpu:fix gmc_v9 vm fault process for SRIOV Monk Liu
                     ` (5 subsequent siblings)
  13 siblings, 1 reply; 34+ messages in thread
From: Monk Liu @ 2017-03-24 10:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

no CG for SRIOV on SOC15

Change-Id: Ic17e99862a875de9bfc811c72d0ab627ba58d585
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 4ebe94b..509cd0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -781,6 +781,9 @@ static int soc15_common_set_clockgating_state(void *handle,
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 		nbio_v6_1_update_medium_grain_clock_gating(adev,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 09/13] drm/amdgpu:fix gmc_v9 vm fault process for SRIOV
       [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (7 preceding siblings ...)
  2017-03-24 10:38   ` [PATCH 08/13] drm/amdgpu:no cg for soc15 of SRIOV Monk Liu
@ 2017-03-24 10:38   ` Monk Liu
       [not found]     ` <1490351912-10122-10-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-24 10:38   ` [PATCH 10/13] drm/amdgpu:fix ring_write_multiple Monk Liu
                     ` (4 subsequent siblings)
  13 siblings, 1 reply; 34+ messages in thread
From: Monk Liu @ 2017-03-24 10:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

for SRIOV we cannot use access register when in IRQ routine
with regular KIQ method

Change-Id: Ifae3164cf12311b851ae131f58175f6ec3174f82
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 51a1919..88221bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -138,20 +138,28 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 	addr = (u64)entry->src_data[0] << 12;
 	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
 
-	if (entry->vm_id_src) {
-		status = RREG32(mmhub->vm_l2_pro_fault_status);
-		WREG32_P(mmhub->vm_l2_pro_fault_cntl, 1, ~1);
-	} else {
-		status = RREG32(gfxhub->vm_l2_pro_fault_status);
-		WREG32_P(gfxhub->vm_l2_pro_fault_cntl, 1, ~1);
-	}
+	if (!amdgpu_sriov_vf(adev)) {
+		if (entry->vm_id_src) {
+			status = RREG32(mmhub->vm_l2_pro_fault_status);
+			WREG32_P(mmhub->vm_l2_pro_fault_cntl, 1, ~1);
+		} else {
+			status = RREG32(gfxhub->vm_l2_pro_fault_status);
+			WREG32_P(gfxhub->vm_l2_pro_fault_cntl, 1, ~1);
+		}
 
-	DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u vm_id:%u pas_id:%u) "
+		DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u vm_id:%u pas_id:%u) "
 		  "at page 0x%016llx from %d\n"
 		  "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
 		  entry->vm_id_src ? "mmhub" : "gfxhub",
 		  entry->src_id, entry->ring_id, entry->vm_id, entry->pas_id,
 		  addr, entry->client_id, status);
+	} else {
+		DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u vm_id:%u pas_id:%u) "
+		  "at page 0x%016llx from %d\n",
+		  entry->vm_id_src ? "mmhub" : "gfxhub",
+		  entry->src_id, entry->ring_id, entry->vm_id, entry->pas_id,
+		  addr, entry->client_id);
+	}
 
 	return 0;
 }
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 10/13] drm/amdgpu:fix ring_write_multiple
       [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (8 preceding siblings ...)
  2017-03-24 10:38   ` [PATCH 09/13] drm/amdgpu:fix gmc_v9 vm fault process for SRIOV Monk Liu
@ 2017-03-24 10:38   ` Monk Liu
       [not found]     ` <1490351912-10122-11-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-24 10:38   ` [PATCH 11/13] drm/amdgpu:fix missing programing critical registers Monk Liu
                     ` (3 subsequent siblings)
  13 siblings, 1 reply; 34+ messages in thread
From: Monk Liu @ 2017-03-24 10:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

ring_write_multiple should use buf_mask instead of ptr_mask

Change-Id: Ia249b6a1a990a6c3cba5c4048de6d604bb91d0ef
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2861aee..284af3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1804,9 +1804,9 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr
 	if (ring->count_dw < count_dw) {
 		DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n");
 	} else {
-		occupied = ring->wptr & ring->ptr_mask;
+		occupied = ring->wptr & ring->buf_mask;
 		dst = (void *)&ring->ring[occupied];
-		chunk1 = ring->ptr_mask + 1 - occupied;
+		chunk1 = ring->buf_mask + 1 - occupied;
 		chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
 		chunk2 = count_dw - chunk1;
 		chunk1 <<= 2;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 11/13] drm/amdgpu:fix missing programing critical registers
       [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (9 preceding siblings ...)
  2017-03-24 10:38   ` [PATCH 10/13] drm/amdgpu:fix ring_write_multiple Monk Liu
@ 2017-03-24 10:38   ` Monk Liu
       [not found]     ` <1490351912-10122-12-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-24 10:38   ` [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme Monk Liu
                     ` (2 subsequent siblings)
  13 siblings, 1 reply; 34+ messages in thread
From: Monk Liu @ 2017-03-24 10:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

those MC_VM registers won't be programed by VBIOS in VF
so driver is responsible to programe them.

Change-Id: I817371346d86bd5668ac80a486dadc1605d0b6ca
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 9 +++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 4 +++-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 9 +++++++++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 1ff019c..1d3c34d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -53,6 +53,15 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
 				mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB),
 				(u32)(value >> 44));
 
+	if (amdgpu_sriov_vf(adev)) {
+		/* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so
+		vbios post doesn't program them, for SRIOV driver need to program them */
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_BASE),
+				adev->mc.vram_start >> 24);
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_TOP),
+				adev->mc.vram_end >> 24);
+	}
+
 	/* Disable AGP. */
 	WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BASE), 0);
 	WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_TOP), 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 88221bb..d841bc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -383,7 +383,9 @@ static int gmc_v9_0_late_init(void *handle)
 static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
 					struct amdgpu_mc *mc)
 {
-	u64 base = mmhub_v1_0_get_fb_location(adev);
+	u64 base = 0;
+	if (!amdgpu_sriov_vf(adev))
+		base = mmhub_v1_0_get_fb_location(adev);
 	amdgpu_vram_location(adev, &adev->mc, base);
 	adev->mc.gtt_base_align = 0;
 	amdgpu_gtt_location(adev, mc);
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index b1e0e6b..12025d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -67,6 +67,15 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
 				mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB),
 				(u32)(value >> 44));
 
+	if (amdgpu_sriov_vf(adev)) {
+		/* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so
+		vbios post doesn't program them, for SRIOV driver need to program them */
+		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE),
+			adev->mc.vram_start >> 24);
+		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP),
+			adev->mc.vram_end >> 24);
+	}
+
 	/* Disable AGP. */
 	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BASE), 0);
 	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_TOP), 0);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme
       [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (10 preceding siblings ...)
  2017-03-24 10:38   ` [PATCH 11/13] drm/amdgpu:fix missing programing critical registers Monk Liu
@ 2017-03-24 10:38   ` Monk Liu
       [not found]     ` <1490351912-10122-13-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-24 10:38   ` [PATCH 13/13] drm/amdgpu:change emit_frame_size of gfx8 Monk Liu
  2017-03-24 11:39   ` [PATCH 00/13] *** VEGA10 SRIOV PATCHES *** Yu, Xiangliang
  13 siblings, 1 reply; 34+ messages in thread
From: Monk Liu @ 2017-03-24 10:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

1) Adapt to vulkan:
Now use double SWITCH BUFFER to replace the 128 nops w/a,
because when vulkan introduced, umd can insert 7 ~ 16 IBs
per submit which makes 256 DW size cannot hold the whole
DMAframe (if we still insert those 128 nops), CP team suggests
use double SWITCH_BUFFERs, instead of tricky 128 NOPs w/a.

2) To fix the CE VM fault issue when MCBP introduced:
Need one more COND_EXEC wrapping IB part (original one us
for VM switch part).

this change can fix vm fault issue caused by below scenario
without this change:

>CE passed original COND_EXEC (no MCBP issued this moment),
 proceed as normal.

>DE catch up to this COND_EXEC, but this time MCBP issued,
 thus DE treats all following packages as NOP. The following
 VM switch packages now looks just as NOP to DE, so DE
 dosen't do VM flush at all.

>Now CE proceeds to the first IBc, and triggers VM fault,
 because DE didn't do VM flush for this DMAframe.

3) change estimated alloc size for gfx9.
with new DMAframe scheme, we need modify emit_frame_size
for gfx9

with above changes, no more 128 NOPs w/a after VM flush

Change-Id: Ib3f92d9d5a81bfff0369a00f23e1e5891797089a
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 77 +++++++++++++++++++++-------------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 29 ++++++++-----
 3 files changed, 69 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index d103270..b300929 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -167,9 +167,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		return r;
 	}
 
-	if (ring->funcs->init_cond_exec)
-		patch_offset = amdgpu_ring_init_cond_exec(ring);
-
 	if (vm) {
 		amdgpu_ring_insert_nop(ring, extra_nop); /* prevent CE go too fast than DE */
 
@@ -180,7 +177,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		}
 	}
 
-	if (ring->funcs->emit_hdp_flush
+	if (ring->funcs->init_cond_exec)
+		patch_offset = amdgpu_ring_init_cond_exec(ring);
+
+		if (ring->funcs->emit_hdp_flush
 #ifdef CONFIG_X86_64
 	    && !(adev->flags & AMD_IS_APU)
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9ff445c..74be4fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -483,42 +483,59 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 		id->oa_size != job->oa_size);
 	int r;
 
-	if (ring->funcs->emit_pipeline_sync && (
-	    job->vm_needs_flush || gds_switch_needed ||
-	    amdgpu_vm_ring_has_compute_vm_bug(ring)))
-		amdgpu_ring_emit_pipeline_sync(ring);
+	if (job->vm_needs_flush || gds_switch_needed ||
+		amdgpu_vm_is_gpu_reset(adev, id) ||
+		amdgpu_vm_ring_has_compute_vm_bug(ring)) {
+		unsigned patch_offset = 0;
 
-	if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
-	    amdgpu_vm_is_gpu_reset(adev, id))) {
-		struct fence *fence;
-		u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
+		if (ring->funcs->init_cond_exec)
+			patch_offset = amdgpu_ring_init_cond_exec(ring);
 
-		trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
-		amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
+		if (ring->funcs->emit_pipeline_sync &&
+			(job->vm_needs_flush || gds_switch_needed ||
+			amdgpu_vm_ring_has_compute_vm_bug(ring)))
+			amdgpu_ring_emit_pipeline_sync(ring);
 
-		r = amdgpu_fence_emit(ring, &fence);
-		if (r)
-			return r;
+		if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
+			amdgpu_vm_is_gpu_reset(adev, id))) {
+			struct fence *fence;
+			u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
 
-		mutex_lock(&adev->vm_manager.lock);
-		fence_put(id->last_flush);
-		id->last_flush = fence;
-		mutex_unlock(&adev->vm_manager.lock);
-	}
+			trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
+			amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
 
-	if (gds_switch_needed) {
-		id->gds_base = job->gds_base;
-		id->gds_size = job->gds_size;
-		id->gws_base = job->gws_base;
-		id->gws_size = job->gws_size;
-		id->oa_base = job->oa_base;
-		id->oa_size = job->oa_size;
-		amdgpu_ring_emit_gds_switch(ring, job->vm_id,
-					    job->gds_base, job->gds_size,
-					    job->gws_base, job->gws_size,
-					    job->oa_base, job->oa_size);
-	}
+			r = amdgpu_fence_emit(ring, &fence);
+			if (r)
+				return r;
 
+			mutex_lock(&adev->vm_manager.lock);
+			fence_put(id->last_flush);
+			id->last_flush = fence;
+			mutex_unlock(&adev->vm_manager.lock);
+		}
+
+		if (gds_switch_needed) {
+			id->gds_base = job->gds_base;
+			id->gds_size = job->gds_size;
+			id->gws_base = job->gws_base;
+			id->gws_size = job->gws_size;
+			id->oa_base = job->oa_base;
+			id->oa_size = job->oa_size;
+			amdgpu_ring_emit_gds_switch(ring, job->vm_id,
+							job->gds_base, job->gds_size,
+							job->gws_base, job->gws_size,
+							job->oa_base, job->oa_size);
+		}
+
+		if (ring->funcs->patch_cond_exec)
+			amdgpu_ring_patch_cond_exec(ring, patch_offset);
+
+		/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
+		if (ring->funcs->emit_switch_buffer) {
+			amdgpu_ring_emit_switch_buffer(ring);
+			amdgpu_ring_emit_switch_buffer(ring);
+		}
+	}
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index ce6fa03..eb8551b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3134,8 +3134,6 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
 		amdgpu_ring_write(ring, 0x0);
-		/* Emits 128 dw nop to prevent CE access VM before vm_flush finish */
-		amdgpu_ring_insert_nop(ring, 128);
 	}
 }
 
@@ -3629,15 +3627,24 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
-	.emit_frame_size =
-		20 + /* gfx_v9_0_ring_emit_gds_switch */
-		7 + /* gfx_v9_0_ring_emit_hdp_flush */
-		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
-		8 + 8 + 8 +/* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
-		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
-		128 + 66 + /* gfx_v9_0_ring_emit_vm_flush */
-		2 + /* gfx_v9_ring_emit_sb */
-		3, /* gfx_v9_ring_emit_cntxcntl */
+	.emit_frame_size = /* totally 242 maximum if 16 IBs */
+		5 +  /* COND_EXEC */
+		7 +  /* PIPELINE_SYNC */
+		46 + /* VM_FLUSH */
+		8 +  /* FENCE for VM_FLUSH */
+		20 + /* GDS switch */
+		4 + /* double SWITCH_BUFFER,
+		       the first COND_EXEC jump to the place just
+			   prior to this double SWITCH_BUFFER  */
+		5 + /* COND_EXEC */
+		7 +	 /*	HDP_flush */
+		4 +	 /*	VGT_flush */
+		14 + /*	CE_META */
+		31 + /*	DE_META */
+		3 + /* CNTX_CTRL */
+		5 + /* HDP_INVL */
+		8 + 8 + /* FENCE x2 */
+		2, /* SWITCH_BUFFER */
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v9_0_ring_emit_fence,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* [PATCH 13/13] drm/amdgpu:change emit_frame_size of gfx8
       [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (11 preceding siblings ...)
  2017-03-24 10:38   ` [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme Monk Liu
@ 2017-03-24 10:38   ` Monk Liu
  2017-03-24 11:39   ` [PATCH 00/13] *** VEGA10 SRIOV PATCHES *** Yu, Xiangliang
  13 siblings, 0 replies; 34+ messages in thread
From: Monk Liu @ 2017-03-24 10:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

and no need to insert 128 nops after gfx8 vm flush anymore
because there was double SWITCH_BUFFER append to vm flush

Change-Id: I6ecec95236bd1745f2beaa1b34a075748813f131
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 396c075..3016e535 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6390,8 +6390,6 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
 		amdgpu_ring_write(ring, 0x0);
-		/* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
-		amdgpu_ring_insert_nop(ring, 128);
 	}
 }
 
@@ -6791,15 +6789,24 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
 	.get_rptr = gfx_v8_0_ring_get_rptr,
 	.get_wptr = gfx_v8_0_ring_get_wptr_gfx,
 	.set_wptr = gfx_v8_0_ring_set_wptr_gfx,
-	.emit_frame_size =
-		20 + /* gfx_v8_0_ring_emit_gds_switch */
-		7 + /* gfx_v8_0_ring_emit_hdp_flush */
-		5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
-		6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
-		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
-		128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
-		2 + /* gfx_v8_ring_emit_sb */
-		3 + 4 + 29, /* gfx_v8_ring_emit_cntxcntl including vgt flush/meta-data */
+	.emit_frame_size = /* maximum 215dw if count 16 IBs in */
+		5 +  /* COND_EXEC */
+		7 +  /* PIPELINE_SYNC */
+		19 + /* VM_FLUSH */
+		8 +  /* FENCE for VM_FLUSH */
+		20 + /* GDS switch */
+		4 + /* double SWITCH_BUFFER,
+		       the first COND_EXEC jump to the place just
+			   prior to this double SWITCH_BUFFER  */
+		5 + /* COND_EXEC */
+		7 +	 /*	HDP_flush */
+		4 +	 /*	VGT_flush */
+		14 + /*	CE_META */
+		31 + /*	DE_META */
+		3 + /* CNTX_CTRL */
+		5 + /* HDP_INVL */
+		8 + 8 + /* FENCE x2 */
+		2, /* SWITCH_BUFFER */
 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_gfx */
 	.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
 	.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 34+ messages in thread

* RE: [PATCH 00/13] *** VEGA10 SRIOV PATCHES ***
       [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
                     ` (12 preceding siblings ...)
  2017-03-24 10:38   ` [PATCH 13/13] drm/amdgpu:change emit_frame_size of gfx8 Monk Liu
@ 2017-03-24 11:39   ` Yu, Xiangliang
  13 siblings, 0 replies; 34+ messages in thread
From: Yu, Xiangliang @ 2017-03-24 11:39 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

Reviewed-by: Xiangliang Yu <Xiangliang.Yu@amd.com> for the series.


Thanks!
Xiangliang Yu


> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:38 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk <Monk.Liu@amd.com>
> Subject: [PATCH 00/13] *** VEGA10 SRIOV PATCHES ***
> 
> patch 1 is a fix for VI
> patch 2 to patch 11 are bug fixings for vega10 for SRIOV patch 12/13 is the
> DMAframe scheme change to fix CE VM fault after world switch.
> 
> 
> Monk Liu (13):
>   drm/amdgpu:imple cond_exec for gfx8
>   drm/amdgpu:enable mcbp for gfx9
>   drm/amdgpu:add KIQ interrupt id
>   drm/amdgpu:virt_init_setting invoke is missed!
>   drm/amdgpu:fix ring init sequence
>   drm/amdgpu:change sequence of SDMA v4 init
>   drm/amdgpu:two fixings for sdma v4 for SRIOV
>   drm/amdgpu:no cg for soc15 of SRIOV
>   drm/amdgpu:fix gmc_v9 vm fault process for SRIOV
>   drm/amdgpu:fix ring_write_multiple
>   drm/amdgpu:fix missing programing critical registers
>   drm/amdgpu:changes in gfx DMAframe scheme
>   drm/amdgpu:change emit_frame_size of gfx8
> 
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  8 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  6 +--
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 77
> +++++++++++++++++++-------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c    | 57 ++++++++++++++++++----
> -
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 37 ++++++++++-----
>  drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c |  9 ++++
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 28 ++++++++----
>  drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  |  9 ++++
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 43 +++++++++++++-----
>  drivers/gpu/drm/amd/amdgpu/soc15.c       |  7 +++
>  12 files changed, 206 insertions(+), 81 deletions(-)
> 
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* RE: [PATCH 01/13] drm/amdgpu:imple cond_exec for gfx8
       [not found]     ` <1490351912-10122-2-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-24 14:38       ` Deucher, Alexander
  0 siblings, 0 replies; 34+ messages in thread
From: Deucher, Alexander @ 2017-03-24 14:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:38 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 01/13] drm/amdgpu:imple cond_exec for gfx8
> 
> when MCBP enalbed for gfx8, the cond_exec must also
> be implemented, otherwise there will be odds to meet
> cross engine (ce and me) deadlock when WORLD switch happens.
> 
> Change-Id: I6bdb5f91dc6e1b56dcad43741a109a6eb08cb5fa
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 28
> ++++++++++++++++++++++++++++
>  1 file changed, 28 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 5757300..396c075 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6499,6 +6499,32 @@ static void gfx_v8_ring_emit_cntxcntl(struct
> amdgpu_ring *ring, uint32_t flags)
>  			(flags & AMDGPU_VM_DOMAIN) ?
> AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
>  }
> 
> +static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring
> *ring)
> +{
> +	unsigned ret;

New line between stack variables can code please.

> +	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
> +	amdgpu_ring_write(ring, lower_32_bits(ring-
> >cond_exe_gpu_addr));
> +	amdgpu_ring_write(ring, upper_32_bits(ring-
> >cond_exe_gpu_addr));
> +	amdgpu_ring_write(ring, 0); /* discard following DWs if
> *cond_exec_gpu_addr==0 */
> +	ret = ring->wptr & ring->buf_mask;
> +	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later
> */
> +	return ret;
> +}
> +
> +static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring
> *ring, unsigned offset)
> +{
> +	unsigned cur;

Same thing here.

With these fixed:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> +	BUG_ON(offset > ring->buf_mask);
> +	BUG_ON(ring->ring[offset] != 0x55aa55aa);
> +
> +	cur = (ring->wptr & ring->buf_mask) - 1;
> +	if (likely(cur > offset))
> +		ring->ring[offset] = cur - offset;
> +	else
> +		ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
> +}
> +
> +
>  static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
>  {
>  	struct amdgpu_device *adev = ring->adev;
> @@ -6788,6 +6814,8 @@ static const struct amdgpu_ring_funcs
> gfx_v8_0_ring_funcs_gfx = {
>  	.pad_ib = amdgpu_ring_generic_pad_ib,
>  	.emit_switch_buffer = gfx_v8_ring_emit_sb,
>  	.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
> +	.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
> +	.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
>  };
> 
>  static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* RE: [PATCH 02/13] drm/amdgpu:enable mcbp for gfx9
       [not found]     ` <1490351912-10122-3-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-24 14:38       ` Deucher, Alexander
  0 siblings, 0 replies; 34+ messages in thread
From: Deucher, Alexander @ 2017-03-24 14:38 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:38 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 02/13] drm/amdgpu:enable mcbp for gfx9
> 
> set bit 21 of IB.control filed to actually enable
> MCBP for SRIOV
> 
> Change-Id: Ie5126d5be95e037087cf7167c28c61975f40d784
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ad82ab7..0d8fb51 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3016,6 +3016,9 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct
> amdgpu_ring *ring,
> 
>          control |= ib->length_dw | (vm_id << 24);
> 
> +		if (amdgpu_sriov_vf(ring->adev) && (ib->flags &
> AMDGPU_IB_FLAG_PREEMPT))
> +			control |= (1 << 21);

Can you add proper defines for these bits?

With that fixed:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> +
>          amdgpu_ring_write(ring, header);
>  	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
>          amdgpu_ring_write(ring,
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* RE: [PATCH 03/13] drm/amdgpu:add KIQ interrupt id
       [not found]     ` <1490351912-10122-4-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-24 14:40       ` Deucher, Alexander
  0 siblings, 0 replies; 34+ messages in thread
From: Deucher, Alexander @ 2017-03-24 14:40 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:38 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 03/13] drm/amdgpu:add KIQ interrupt id
> 
> 178 id is for GENERIC2 interrupt which is used by KIQ
> 
> Change-Id: I9aa9a90a97b33aec8ca8207d1814eaecdc92667d
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 5 +++++
>  1 file changed, 5 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 0d8fb51..ce6fa03 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -1023,6 +1023,11 @@ static int gfx_v9_0_sw_init(void *handle)
>  	struct amdgpu_kiq *kiq;
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> 
> +	/* KIQ event */
> +	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP,
> 178, &adev->gfx.kiq.irq);
> +	if (r)
> +		return r;
> +
>  	/* EOP Event */
>  	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP,
> 181, &adev->gfx.eop_irq);
>  	if (r)
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* RE: [PATCH 04/13] drm/amdgpu:virt_init_setting invoke is missed!
       [not found]     ` <1490351912-10122-5-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-24 14:40       ` Deucher, Alexander
  0 siblings, 0 replies; 34+ messages in thread
From: Deucher, Alexander @ 2017-03-24 14:40 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:38 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 04/13] drm/amdgpu:virt_init_setting invoke is missed!
> 
> this must be invoked during early init
> 
> Change-Id: I68726dd36825259913b47493ba1e9c467b368d0c
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/soc15.c | 4 ++++
>  1 file changed, 4 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 7e54d9dc..4ebe94b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -553,6 +553,10 @@ static int soc15_common_early_init(void *handle)
>  		(amdgpu_ip_block_mask & (1 <<
> AMD_IP_BLOCK_TYPE_PSP)))
>  		psp_enabled = true;
> 
> +	if (amdgpu_sriov_vf(adev)) {
> +		amdgpu_virt_init_setting(adev);
> +	}
> +
>  	/*
>  	 * nbio need be used for both sdma and gfx9, but only
>  	 * initializes once
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* RE: [PATCH 06/13] drm/amdgpu:change sequence of SDMA v4 init
       [not found]     ` <1490351912-10122-7-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-24 14:46       ` Deucher, Alexander
  0 siblings, 0 replies; 34+ messages in thread
From: Deucher, Alexander @ 2017-03-24 14:46 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:38 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 06/13] drm/amdgpu:change sequence of SDMA v4 init
> 
> must set minor_update.enable before write smaller value
> to wptr/doorbell, so for sriov we need set that register
> bit in hw_init period.
> 
> this could fix the SDMA ring test fail after guest reboot
> 
> Change-Id: Id863396788cc5b35550cdcac405131d41690e77a
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 37
> +++++++++++++++++++++++++---------
>  1 file changed, 27 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index ee3b4a9..4d9fec8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -560,8 +560,14 @@ static int sdma_v4_0_gfx_resume(struct
> amdgpu_device *adev)
>  		WREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
> 
>  		ring->wptr = 0;
> -		WREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
> -		WREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
> +
> +		/* before programing wptr to a less value, need set
> minor_ptr_update first */
> +		WREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
> +
> +		if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register
> write for wptr */
> +			WREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
> +			WREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
> +		}
> 
>  		doorbell = RREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_GFX_DOORBELL));
>  		doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_GFX_DOORBELL_OFFSET));
> @@ -577,15 +583,23 @@ static int sdma_v4_0_gfx_resume(struct
> amdgpu_device *adev)
>  		WREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
>  		nbio_v6_1_sdma_doorbell_range(adev, i, ring-
> >use_doorbell, ring->doorbell_index);
> 
> +		if (amdgpu_sriov_vf(adev))
> +			sdma_v4_0_ring_set_wptr(ring);
> +
> +		/* set minor_ptr_update to 0 after wptr programed */
> +		WREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
> +
>  		/* set utc l1 enable flag always to 1 */
>  		temp = RREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_CNTL));
>  		temp = REG_SET_FIELD(temp, SDMA0_CNTL,
> UTC_L1_ENABLE, 1);
>  		WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL),
> temp);
> 
> -		/* unhalt engine */
> -		temp = RREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_F32_CNTL));
> -		temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
> -		WREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_F32_CNTL), temp);
> +		if (!amdgpu_sriov_vf(adev)) {
> +			/* unhalt engine */
> +			temp = RREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_F32_CNTL));
> +			temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL,
> HALT, 0);
> +			WREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_F32_CNTL), temp);
> +		}
> 
>  		/* enable DMA RB */
>  		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
> RB_ENABLE, 1);
> @@ -601,6 +615,11 @@ static int sdma_v4_0_gfx_resume(struct
> amdgpu_device *adev)
> 
>  		ring->ready = true;
> 
> +		if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence
> doesn't need below to lines */
> +			sdma_v4_0_ctx_switch_enable(adev, true);
> +			sdma_v4_0_enable(adev, true);
> +		}
> +
>  		r = amdgpu_ring_test_ring(ring);
>  		if (r) {
>  			ring->ready = false;
> @@ -671,8 +690,6 @@ static int sdma_v4_0_load_microcode(struct
> amdgpu_device *adev)
>  			(adev->sdma.instance[i].fw->data +
>  				le32_to_cpu(hdr-
> >header.ucode_array_offset_bytes));
> 
> -		sdma_v4_0_print_ucode_regs(adev);
> -

This should be a separate change.  With that fixed:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

It might be nice to further unify these sequences if possible.  Some of these may not be required for bare metal, but as long as they don't hurt anything, I think it makes sense to reduce the number of code paths in general.

Alex


>  		WREG32(sdma_v4_0_get_reg_offset(i,
> mmSDMA0_UCODE_ADDR), 0);
> 
> 
> @@ -699,10 +716,10 @@ static int sdma_v4_0_load_microcode(struct
> amdgpu_device *adev)
>   */
>  static int sdma_v4_0_start(struct amdgpu_device *adev)
>  {
> -	int r;
> +	int r = 0;
> 
>  	if (amdgpu_sriov_vf(adev)) {
> -		/* disable RB and halt engine */
> +		sdma_v4_0_ctx_switch_enable(adev, false);
>  		sdma_v4_0_enable(adev, false);
> 
>  		/* set RB registers */
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* RE: [PATCH 07/13] drm/amdgpu:two fixings for sdma v4 for SRIOV
       [not found]     ` <1490351912-10122-8-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-24 14:47       ` Deucher, Alexander
  0 siblings, 0 replies; 34+ messages in thread
From: Deucher, Alexander @ 2017-03-24 14:47 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:38 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 07/13] drm/amdgpu:two fixings for sdma v4 for SRIOV
> 
> no hw_fini for SRIOV, otherwise other VF will be affected
> no CG for SRIOV
> 
> Change-Id: I1b0525eb8d08754b4bd1a6ee6798bf5e41c6bc6b
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 6 ++++++
>  1 file changed, 6 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 4d9fec8..443f850 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -1172,6 +1172,9 @@ static int sdma_v4_0_hw_fini(void *handle)
>  {
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> 
> +	if (amdgpu_sriov_vf(adev))
> +		return 0;
> +
>  	sdma_v4_0_ctx_switch_enable(adev, false);
>  	sdma_v4_0_enable(adev, false);
> 
> @@ -1406,6 +1409,9 @@ static int sdma_v4_0_set_clockgating_state(void
> *handle,
>  {
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> 
> +	if (amdgpu_sriov_vf(adev))
> +		return 0;
> +
>  	switch (adev->asic_type) {
>  	case CHIP_VEGA10:
>  		sdma_v4_0_update_medium_grain_clock_gating(adev,
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* RE: [PATCH 08/13] drm/amdgpu:no cg for soc15 of SRIOV
       [not found]     ` <1490351912-10122-9-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-24 14:47       ` Deucher, Alexander
  0 siblings, 0 replies; 34+ messages in thread
From: Deucher, Alexander @ 2017-03-24 14:47 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:38 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 08/13] drm/amdgpu:no cg for soc15 of SRIOV
> 
> no CG for SRIOV on SOC15
> 
> Change-Id: Ic17e99862a875de9bfc811c72d0ab627ba58d585
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/soc15.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 4ebe94b..509cd0a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -781,6 +781,9 @@ static int soc15_common_set_clockgating_state(void
> *handle,
>  {
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> 
> +	if (amdgpu_sriov_vf(adev))
> +		return 0;
> +
>  	switch (adev->asic_type) {
>  	case CHIP_VEGA10:
>  		nbio_v6_1_update_medium_grain_clock_gating(adev,
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* RE: [PATCH 09/13] drm/amdgpu:fix gmc_v9 vm fault process for SRIOV
       [not found]     ` <1490351912-10122-10-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-24 14:49       ` Deucher, Alexander
  2017-03-27  1:51       ` Zhang, Jerry (Junwei)
  1 sibling, 0 replies; 34+ messages in thread
From: Deucher, Alexander @ 2017-03-24 14:49 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:38 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 09/13] drm/amdgpu:fix gmc_v9 vm fault process for SRIOV
> 
> for SRIOV we cannot use access register when in IRQ routine
> with regular KIQ method
> 
> Change-Id: Ifae3164cf12311b851ae131f58175f6ec3174f82
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 24 ++++++++++++++++--------
>  1 file changed, 16 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 51a1919..88221bb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -138,20 +138,28 @@ static int gmc_v9_0_process_interrupt(struct
> amdgpu_device *adev,
>  	addr = (u64)entry->src_data[0] << 12;
>  	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
> 
> -	if (entry->vm_id_src) {
> -		status = RREG32(mmhub->vm_l2_pro_fault_status);
> -		WREG32_P(mmhub->vm_l2_pro_fault_cntl, 1, ~1);
> -	} else {
> -		status = RREG32(gfxhub->vm_l2_pro_fault_status);
> -		WREG32_P(gfxhub->vm_l2_pro_fault_cntl, 1, ~1);
> -	}
> +	if (!amdgpu_sriov_vf(adev)) {
> +		if (entry->vm_id_src) {
> +			status = RREG32(mmhub->vm_l2_pro_fault_status);
> +			WREG32_P(mmhub->vm_l2_pro_fault_cntl, 1, ~1);
> +		} else {
> +			status = RREG32(gfxhub->vm_l2_pro_fault_status);
> +			WREG32_P(gfxhub->vm_l2_pro_fault_cntl, 1, ~1);
> +		}
> 
> -	DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u vm_id:%u
> pas_id:%u) "
> +		DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u
> vm_id:%u pas_id:%u) "
>  		  "at page 0x%016llx from %d\n"
>  		  "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
>  		  entry->vm_id_src ? "mmhub" : "gfxhub",
>  		  entry->src_id, entry->ring_id, entry->vm_id, entry->pas_id,
>  		  addr, entry->client_id, status);

Fix the indentation here.

> +	} else {
> +		DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u
> vm_id:%u pas_id:%u) "
> +		  "at page 0x%016llx from %d\n",
> +		  entry->vm_id_src ? "mmhub" : "gfxhub",
> +		  entry->src_id, entry->ring_id, entry->vm_id, entry->pas_id,
> +		  addr, entry->client_id);

And here.  With that fixed:
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>


> +	}
> 
>  	return 0;
>  }
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* RE: [PATCH 11/13] drm/amdgpu:fix missing programing critical registers
       [not found]     ` <1490351912-10122-12-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-24 14:50       ` Deucher, Alexander
  0 siblings, 0 replies; 34+ messages in thread
From: Deucher, Alexander @ 2017-03-24 14:50 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:39 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 11/13] drm/amdgpu:fix missing programing critical registers
> 
> those MC_VM registers won't be programed by VBIOS in VF
> so driver is responsible to programe them.
> 
> Change-Id: I817371346d86bd5668ac80a486dadc1605d0b6ca
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 9 +++++++++
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c    | 4 +++-
>  drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 9 +++++++++
>  3 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> index 1ff019c..1d3c34d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
> @@ -53,6 +53,15 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device
> *adev)
> 
> 	mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB),
>  				(u32)(value >> 44));
> 
> +	if (amdgpu_sriov_vf(adev)) {
> +		/* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF,
> becuase they are VF copy registers so
> +		vbios post doesn't program them, for SRIOV driver need to
> program them */
> +		WREG32(SOC15_REG_OFFSET(GC, 0,
> mmMC_VM_FB_LOCATION_BASE),
> +				adev->mc.vram_start >> 24);
> +		WREG32(SOC15_REG_OFFSET(GC, 0,
> mmMC_VM_FB_LOCATION_TOP),
> +				adev->mc.vram_end >> 24);
> +	}
> +
>  	/* Disable AGP. */
>  	WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BASE), 0);
>  	WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_TOP), 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 88221bb..d841bc9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -383,7 +383,9 @@ static int gmc_v9_0_late_init(void *handle)
>  static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
>  					struct amdgpu_mc *mc)
>  {
> -	u64 base = mmhub_v1_0_get_fb_location(adev);
> +	u64 base = 0;
> +	if (!amdgpu_sriov_vf(adev))
> +		base = mmhub_v1_0_get_fb_location(adev);
>  	amdgpu_vram_location(adev, &adev->mc, base);
>  	adev->mc.gtt_base_align = 0;
>  	amdgpu_gtt_location(adev, mc);
> diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> index b1e0e6b..12025d0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
> @@ -67,6 +67,15 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device
> *adev)
> 
> 	mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB),
>  				(u32)(value >> 44));
> 
> +	if (amdgpu_sriov_vf(adev)) {
> +		/* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF,
> becuase they are VF copy registers so
> +		vbios post doesn't program them, for SRIOV driver need to
> program them */
> +		WREG32(SOC15_REG_OFFSET(MMHUB, 0,
> mmMC_VM_FB_LOCATION_BASE),
> +			adev->mc.vram_start >> 24);
> +		WREG32(SOC15_REG_OFFSET(MMHUB, 0,
> mmMC_VM_FB_LOCATION_TOP),
> +			adev->mc.vram_end >> 24);
> +	}
> +
>  	/* Disable AGP. */
>  	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BASE),
> 0);
>  	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_TOP),
> 0);
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 09/13] drm/amdgpu:fix gmc_v9 vm fault process for SRIOV
       [not found]     ` <1490351912-10122-10-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-24 14:49       ` Deucher, Alexander
@ 2017-03-27  1:51       ` Zhang, Jerry (Junwei)
       [not found]         ` <58D8700C.1030305-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 1 reply; 34+ messages in thread
From: Zhang, Jerry (Junwei) @ 2017-03-27  1:51 UTC (permalink / raw)
  To: Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 03/24/2017 06:38 PM, Monk Liu wrote:
> for SRIOV we cannot use access register when in IRQ routine
> with regular KIQ method
>
> Change-Id: Ifae3164cf12311b851ae131f58175f6ec3174f82
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 24 ++++++++++++++++--------
>   1 file changed, 16 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 51a1919..88221bb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -138,20 +138,28 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
>   	addr = (u64)entry->src_data[0] << 12;
>   	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
>
> -	if (entry->vm_id_src) {
> -		status = RREG32(mmhub->vm_l2_pro_fault_status);
> -		WREG32_P(mmhub->vm_l2_pro_fault_cntl, 1, ~1);
> -	} else {
> -		status = RREG32(gfxhub->vm_l2_pro_fault_status);
> -		WREG32_P(gfxhub->vm_l2_pro_fault_cntl, 1, ~1);
> -	}
> +	if (!amdgpu_sriov_vf(adev)) {
> +		if (entry->vm_id_src) {
> +			status = RREG32(mmhub->vm_l2_pro_fault_status);
> +			WREG32_P(mmhub->vm_l2_pro_fault_cntl, 1, ~1);
> +		} else {
> +			status = RREG32(gfxhub->vm_l2_pro_fault_status);
> +			WREG32_P(gfxhub->vm_l2_pro_fault_cntl, 1, ~1);
> +		}

Even though SRIOV don't use status info, is it needed to clear vm L2 fault cntl 
regs?

If not,
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>

>
> -	DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u vm_id:%u pas_id:%u) "
> +		DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u vm_id:%u pas_id:%u) "
>   		  "at page 0x%016llx from %d\n"
>   		  "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
>   		  entry->vm_id_src ? "mmhub" : "gfxhub",
>   		  entry->src_id, entry->ring_id, entry->vm_id, entry->pas_id,
>   		  addr, entry->client_id, status);
> +	} else {
> +		DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u vm_id:%u pas_id:%u) "
> +		  "at page 0x%016llx from %d\n",
> +		  entry->vm_id_src ? "mmhub" : "gfxhub",
> +		  entry->src_id, entry->ring_id, entry->vm_id, entry->pas_id,
> +		  addr, entry->client_id);
> +	}
>
>   	return 0;
>   }
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 09/13] drm/amdgpu:fix gmc_v9 vm fault process for SRIOV
       [not found]         ` <58D8700C.1030305-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-27  6:39           ` Christian König
       [not found]             ` <56a0f0fb-42f7-9b1e-ffaa-caa142a84e1c-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 34+ messages in thread
From: Christian König @ 2017-03-27  6:39 UTC (permalink / raw)
  To: Zhang, Jerry (Junwei),
	Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 27.03.2017 um 03:51 schrieb Zhang, Jerry (Junwei):
> On 03/24/2017 06:38 PM, Monk Liu wrote:
>> for SRIOV we cannot use access register when in IRQ routine
>> with regular KIQ method
>>
>> Change-Id: Ifae3164cf12311b851ae131f58175f6ec3174f82
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 24 ++++++++++++++++--------
>>   1 file changed, 16 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index 51a1919..88221bb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -138,20 +138,28 @@ static int gmc_v9_0_process_interrupt(struct 
>> amdgpu_device *adev,
>>       addr = (u64)entry->src_data[0] << 12;
>>       addr |= ((u64)entry->src_data[1] & 0xf) << 44;
>>
>> -    if (entry->vm_id_src) {
>> -        status = RREG32(mmhub->vm_l2_pro_fault_status);
>> -        WREG32_P(mmhub->vm_l2_pro_fault_cntl, 1, ~1);
>> -    } else {
>> -        status = RREG32(gfxhub->vm_l2_pro_fault_status);
>> -        WREG32_P(gfxhub->vm_l2_pro_fault_cntl, 1, ~1);
>> -    }
>> +    if (!amdgpu_sriov_vf(adev)) {
>> +        if (entry->vm_id_src) {
>> +            status = RREG32(mmhub->vm_l2_pro_fault_status);
>> +            WREG32_P(mmhub->vm_l2_pro_fault_cntl, 1, ~1);
>> +        } else {
>> +            status = RREG32(gfxhub->vm_l2_pro_fault_status);
>> +            WREG32_P(gfxhub->vm_l2_pro_fault_cntl, 1, ~1);
>> +        }
>
> Even though SRIOV don't use status info, is it needed to clear vm L2 
> fault cntl regs?

Actually it is forbidden to clear that register under SRIOV. So the 
answer is no we shouldn't clear it.

>
> If not,
> Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com> as well.

Regards,
Christian.

>
>>
>> -    DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u vm_id:%u 
>> pas_id:%u) "
>> +        DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u vm_id:%u 
>> pas_id:%u) "
>>             "at page 0x%016llx from %d\n"
>>             "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
>>             entry->vm_id_src ? "mmhub" : "gfxhub",
>>             entry->src_id, entry->ring_id, entry->vm_id, entry->pas_id,
>>             addr, entry->client_id, status);
>> +    } else {
>> +        DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u vm_id:%u 
>> pas_id:%u) "
>> +          "at page 0x%016llx from %d\n",
>> +          entry->vm_id_src ? "mmhub" : "gfxhub",
>> +          entry->src_id, entry->ring_id, entry->vm_id, entry->pas_id,
>> +          addr, entry->client_id);
>> +    }
>>
>>       return 0;
>>   }
>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 09/13] drm/amdgpu:fix gmc_v9 vm fault process for SRIOV
       [not found]             ` <56a0f0fb-42f7-9b1e-ffaa-caa142a84e1c-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-03-27  7:00               ` Zhang, Jerry (Junwei)
  0 siblings, 0 replies; 34+ messages in thread
From: Zhang, Jerry (Junwei) @ 2017-03-27  7:00 UTC (permalink / raw)
  To: Christian König, Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 03/27/2017 02:39 PM, Christian König wrote:
> Am 27.03.2017 um 03:51 schrieb Zhang, Jerry (Junwei):
>> On 03/24/2017 06:38 PM, Monk Liu wrote:
>>> for SRIOV we cannot use access register when in IRQ routine
>>> with regular KIQ method
>>>
>>> Change-Id: Ifae3164cf12311b851ae131f58175f6ec3174f82
>>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 24 ++++++++++++++++--------
>>>   1 file changed, 16 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> index 51a1919..88221bb 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> @@ -138,20 +138,28 @@ static int gmc_v9_0_process_interrupt(struct
>>> amdgpu_device *adev,
>>>       addr = (u64)entry->src_data[0] << 12;
>>>       addr |= ((u64)entry->src_data[1] & 0xf) << 44;
>>>
>>> -    if (entry->vm_id_src) {
>>> -        status = RREG32(mmhub->vm_l2_pro_fault_status);
>>> -        WREG32_P(mmhub->vm_l2_pro_fault_cntl, 1, ~1);
>>> -    } else {
>>> -        status = RREG32(gfxhub->vm_l2_pro_fault_status);
>>> -        WREG32_P(gfxhub->vm_l2_pro_fault_cntl, 1, ~1);
>>> -    }
>>> +    if (!amdgpu_sriov_vf(adev)) {
>>> +        if (entry->vm_id_src) {
>>> +            status = RREG32(mmhub->vm_l2_pro_fault_status);
>>> +            WREG32_P(mmhub->vm_l2_pro_fault_cntl, 1, ~1);
>>> +        } else {
>>> +            status = RREG32(gfxhub->vm_l2_pro_fault_status);
>>> +            WREG32_P(gfxhub->vm_l2_pro_fault_cntl, 1, ~1);
>>> +        }
>>
>> Even though SRIOV don't use status info, is it needed to clear vm L2 fault
>> cntl regs?
>
> Actually it is forbidden to clear that register under SRIOV. So the answer is
> no we shouldn't clear it.

Thanks for your clarification.
That's fine.

Jerry

>
>>
>> If not,
>> Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
>
> Reviewed-by: Christian König <christian.koenig@amd.com> as well.
>
> Regards,
> Christian.
>
>>
>>>
>>> -    DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u vm_id:%u pas_id:%u) "
>>> +        DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u vm_id:%u pas_id:%u) "
>>>             "at page 0x%016llx from %d\n"
>>>             "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
>>>             entry->vm_id_src ? "mmhub" : "gfxhub",
>>>             entry->src_id, entry->ring_id, entry->vm_id, entry->pas_id,
>>>             addr, entry->client_id, status);
>>> +    } else {
>>> +        DRM_ERROR("[%s]VMC page fault (src_id:%u ring:%u vm_id:%u pas_id:%u) "
>>> +          "at page 0x%016llx from %d\n",
>>> +          entry->vm_id_src ? "mmhub" : "gfxhub",
>>> +          entry->src_id, entry->ring_id, entry->vm_id, entry->pas_id,
>>> +          addr, entry->client_id);
>>> +    }
>>>
>>>       return 0;
>>>   }
>>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* RE: [PATCH 05/13] drm/amdgpu:fix ring init sequence
       [not found]     ` <1490351912-10122-6-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-27 15:54       ` Deucher, Alexander
  0 siblings, 0 replies; 34+ messages in thread
From: Deucher, Alexander @ 2017-03-27 15:54 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:38 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 05/13] drm/amdgpu:fix ring init sequence
> 
> ring->buf_mask need be set prior to ring_clear_ring invoke
> and fix ring_clear_ring as well which should use buf_mask
> instead of ptr_mask
> 
> Change-Id: I7778a7afe27ac2bdedcaba1b0146582100602f9d
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 6 +++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +-
>  2 files changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index e619833..10e94d0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -235,6 +235,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev,
> struct amdgpu_ring *ring,
>  	ring->ring_size = roundup_pow_of_two(max_dw * 4 *
>  					     amdgpu_sched_hw_submission);
> 
> +	ring->buf_mask = (ring->ring_size / 4) - 1;
> +	ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
> +		0xffffffffffffffff : ring->buf_mask;
>  	/* Allocate ring buffer */
>  	if (ring->ring_obj == NULL) {
>  		r = amdgpu_bo_create_kernel(adev, ring->ring_size,
> PAGE_SIZE,
> @@ -248,9 +251,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev,
> struct amdgpu_ring *ring,
>  		}
>  		amdgpu_ring_clear_ring(ring);
>  	}
> -	ring->buf_mask = (ring->ring_size / 4) - 1;
> -	ring->ptr_mask = ring->funcs->support_64bit_ptrs ?
> -		0xffffffffffffffff : ring->buf_mask;
> 
>  	ring->max_dw = max_dw;
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 9f57eda..5fd3dd1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -194,7 +194,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring);
>  static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
>  {
>  	int i = 0;
> -	while (i <= ring->ptr_mask)
> +	while (i <= ring->buf_mask)
>  		ring->ring[i++] = ring->funcs->nop;
> 
>  }
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* RE: [PATCH 10/13] drm/amdgpu:fix ring_write_multiple
       [not found]     ` <1490351912-10122-11-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-27 15:54       ` Deucher, Alexander
  0 siblings, 0 replies; 34+ messages in thread
From: Deucher, Alexander @ 2017-03-27 15:54 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:38 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 10/13] drm/amdgpu:fix ring_write_multiple
> 
> ring_write_multiple should use buf_mask instead of ptr_mask
> 
> Change-Id: Ia249b6a1a990a6c3cba5c4048de6d604bb91d0ef
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 2861aee..284af3a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1804,9 +1804,9 @@ static inline void
> amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr
>  	if (ring->count_dw < count_dw) {
>  		DRM_ERROR("amdgpu: writing more dwords to the ring than
> expected!\n");
>  	} else {
> -		occupied = ring->wptr & ring->ptr_mask;
> +		occupied = ring->wptr & ring->buf_mask;
>  		dst = (void *)&ring->ring[occupied];
> -		chunk1 = ring->ptr_mask + 1 - occupied;
> +		chunk1 = ring->buf_mask + 1 - occupied;
>  		chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1;
>  		chunk2 = count_dw - chunk1;
>  		chunk1 <<= 2;
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* RE: [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme
       [not found]     ` <1490351912-10122-13-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-03-27 15:59       ` Deucher, Alexander
  2017-03-27 16:16       ` Christian König
  1 sibling, 0 replies; 34+ messages in thread
From: Deucher, Alexander @ 2017-03-27 15:59 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Liu, Monk

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Monk Liu
> Sent: Friday, March 24, 2017 6:39 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme
> 
> 1) Adapt to vulkan:
> Now use double SWITCH BUFFER to replace the 128 nops w/a,
> because when vulkan introduced, umd can insert 7 ~ 16 IBs
> per submit which makes 256 DW size cannot hold the whole
> DMAframe (if we still insert those 128 nops), CP team suggests
> use double SWITCH_BUFFERs, instead of tricky 128 NOPs w/a.
> 
> 2) To fix the CE VM fault issue when MCBP introduced:
> Need one more COND_EXEC wrapping IB part (original one us
> for VM switch part).
> 
> this change can fix vm fault issue caused by below scenario
> without this change:
> 
> >CE passed original COND_EXEC (no MCBP issued this moment),
>  proceed as normal.
> 
> >DE catch up to this COND_EXEC, but this time MCBP issued,
>  thus DE treats all following packages as NOP. The following
>  VM switch packages now looks just as NOP to DE, so DE
>  dosen't do VM flush at all.
> 
> >Now CE proceeds to the first IBc, and triggers VM fault,
>  because DE didn't do VM flush for this DMAframe.
> 
> 3) change estimated alloc size for gfx9.
> with new DMAframe scheme, we need modify emit_frame_size
> for gfx9
> 
> with above changes, no more 128 NOPs w/a after VM flush
> 
> Change-Id: Ib3f92d9d5a81bfff0369a00f23e1e5891797089a
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

I haven't really kept up on this whole saga with the DE and CE so assuming there are no regressions on bare metal, patches 12, 13 are:
Acked-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 77
> +++++++++++++++++++++-------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 29 ++++++++-----
>  3 files changed, 69 insertions(+), 45 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index d103270..b300929 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -167,9 +167,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned num_ibs,
>  		return r;
>  	}
> 
> -	if (ring->funcs->init_cond_exec)
> -		patch_offset = amdgpu_ring_init_cond_exec(ring);
> -
>  	if (vm) {
>  		amdgpu_ring_insert_nop(ring, extra_nop); /* prevent CE go
> too fast than DE */
> 
> @@ -180,7 +177,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned num_ibs,
>  		}
>  	}
> 
> -	if (ring->funcs->emit_hdp_flush
> +	if (ring->funcs->init_cond_exec)
> +		patch_offset = amdgpu_ring_init_cond_exec(ring);
> +
> +		if (ring->funcs->emit_hdp_flush
>  #ifdef CONFIG_X86_64
>  	    && !(adev->flags & AMD_IS_APU)
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9ff445c..74be4fa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -483,42 +483,59 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
> struct amdgpu_job *job)
>  		id->oa_size != job->oa_size);
>  	int r;
> 
> -	if (ring->funcs->emit_pipeline_sync && (
> -	    job->vm_needs_flush || gds_switch_needed ||
> -	    amdgpu_vm_ring_has_compute_vm_bug(ring)))
> -		amdgpu_ring_emit_pipeline_sync(ring);
> +	if (job->vm_needs_flush || gds_switch_needed ||
> +		amdgpu_vm_is_gpu_reset(adev, id) ||
> +		amdgpu_vm_ring_has_compute_vm_bug(ring)) {
> +		unsigned patch_offset = 0;
> 
> -	if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
> -	    amdgpu_vm_is_gpu_reset(adev, id))) {
> -		struct fence *fence;
> -		u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job-
> >vm_pd_addr);
> +		if (ring->funcs->init_cond_exec)
> +			patch_offset = amdgpu_ring_init_cond_exec(ring);
> 
> -		trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
> -		amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
> +		if (ring->funcs->emit_pipeline_sync &&
> +			(job->vm_needs_flush || gds_switch_needed ||
> +			amdgpu_vm_ring_has_compute_vm_bug(ring)))
> +			amdgpu_ring_emit_pipeline_sync(ring);
> 
> -		r = amdgpu_fence_emit(ring, &fence);
> -		if (r)
> -			return r;
> +		if (ring->funcs->emit_vm_flush && (job->vm_needs_flush
> ||
> +			amdgpu_vm_is_gpu_reset(adev, id))) {
> +			struct fence *fence;
> +			u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev,
> job->vm_pd_addr);
> 
> -		mutex_lock(&adev->vm_manager.lock);
> -		fence_put(id->last_flush);
> -		id->last_flush = fence;
> -		mutex_unlock(&adev->vm_manager.lock);
> -	}
> +			trace_amdgpu_vm_flush(pd_addr, ring->idx, job-
> >vm_id);
> +			amdgpu_ring_emit_vm_flush(ring, job->vm_id,
> pd_addr);
> 
> -	if (gds_switch_needed) {
> -		id->gds_base = job->gds_base;
> -		id->gds_size = job->gds_size;
> -		id->gws_base = job->gws_base;
> -		id->gws_size = job->gws_size;
> -		id->oa_base = job->oa_base;
> -		id->oa_size = job->oa_size;
> -		amdgpu_ring_emit_gds_switch(ring, job->vm_id,
> -					    job->gds_base, job->gds_size,
> -					    job->gws_base, job->gws_size,
> -					    job->oa_base, job->oa_size);
> -	}
> +			r = amdgpu_fence_emit(ring, &fence);
> +			if (r)
> +				return r;
> 
> +			mutex_lock(&adev->vm_manager.lock);
> +			fence_put(id->last_flush);
> +			id->last_flush = fence;
> +			mutex_unlock(&adev->vm_manager.lock);
> +		}
> +
> +		if (gds_switch_needed) {
> +			id->gds_base = job->gds_base;
> +			id->gds_size = job->gds_size;
> +			id->gws_base = job->gws_base;
> +			id->gws_size = job->gws_size;
> +			id->oa_base = job->oa_base;
> +			id->oa_size = job->oa_size;
> +			amdgpu_ring_emit_gds_switch(ring, job->vm_id,
> +							job->gds_base, job-
> >gds_size,
> +							job->gws_base, job-
> >gws_size,
> +							job->oa_base, job-
> >oa_size);
> +		}
> +
> +		if (ring->funcs->patch_cond_exec)
> +			amdgpu_ring_patch_cond_exec(ring, patch_offset);
> +
> +		/* the double SWITCH_BUFFER here *cannot* be skipped by
> COND_EXEC */
> +		if (ring->funcs->emit_switch_buffer) {
> +			amdgpu_ring_emit_switch_buffer(ring);
> +			amdgpu_ring_emit_switch_buffer(ring);
> +		}
> +	}
>  	return 0;
>  }
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ce6fa03..eb8551b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3134,8 +3134,6 @@ static void gfx_v9_0_ring_emit_vm_flush(struct
> amdgpu_ring *ring,
>  		/* sync PFP to ME, otherwise we might get invalid PFP reads
> */
>  		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME,
> 0));
>  		amdgpu_ring_write(ring, 0x0);
> -		/* Emits 128 dw nop to prevent CE access VM before
> vm_flush finish */
> -		amdgpu_ring_insert_nop(ring, 128);
>  	}
>  }
> 
> @@ -3629,15 +3627,24 @@ static const struct amdgpu_ring_funcs
> gfx_v9_0_ring_funcs_gfx = {
>  	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
>  	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
>  	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
> -	.emit_frame_size =
> -		20 + /* gfx_v9_0_ring_emit_gds_switch */
> -		7 + /* gfx_v9_0_ring_emit_hdp_flush */
> -		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
> -		8 + 8 + 8 +/* gfx_v9_0_ring_emit_fence x3 for user fence,
> vm fence */
> -		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> -		128 + 66 + /* gfx_v9_0_ring_emit_vm_flush */
> -		2 + /* gfx_v9_ring_emit_sb */
> -		3, /* gfx_v9_ring_emit_cntxcntl */
> +	.emit_frame_size = /* totally 242 maximum if 16 IBs */
> +		5 +  /* COND_EXEC */
> +		7 +  /* PIPELINE_SYNC */
> +		46 + /* VM_FLUSH */
> +		8 +  /* FENCE for VM_FLUSH */
> +		20 + /* GDS switch */
> +		4 + /* double SWITCH_BUFFER,
> +		       the first COND_EXEC jump to the place just
> +			   prior to this double SWITCH_BUFFER  */
> +		5 + /* COND_EXEC */
> +		7 +	 /*	HDP_flush */
> +		4 +	 /*	VGT_flush */
> +		14 + /*	CE_META */
> +		31 + /*	DE_META */
> +		3 + /* CNTX_CTRL */
> +		5 + /* HDP_INVL */
> +		8 + 8 + /* FENCE x2 */
> +		2, /* SWITCH_BUFFER */
>  	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
>  	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
>  	.emit_fence = gfx_v9_0_ring_emit_fence,
> --
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme
       [not found]     ` <1490351912-10122-13-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  2017-03-27 15:59       ` Deucher, Alexander
@ 2017-03-27 16:16       ` Christian König
       [not found]         ` <0befd76b-3656-af37-e820-8d84660cf20e-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  1 sibling, 1 reply; 34+ messages in thread
From: Christian König @ 2017-03-27 16:16 UTC (permalink / raw)
  To: Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 24.03.2017 um 11:38 schrieb Monk Liu:
> 1) Adapt to vulkan:
> Now use double SWITCH BUFFER to replace the 128 nops w/a,
> because when vulkan introduced, umd can insert 7 ~ 16 IBs
> per submit which makes 256 DW size cannot hold the whole
> DMAframe (if we still insert those 128 nops), CP team suggests
> use double SWITCH_BUFFERs, instead of tricky 128 NOPs w/a.
>
> 2) To fix the CE VM fault issue when MCBP introduced:
> Need one more COND_EXEC wrapping IB part (original one us
> for VM switch part).
>
> this change can fix vm fault issue caused by below scenario
> without this change:
>
>> CE passed original COND_EXEC (no MCBP issued this moment),
>   proceed as normal.
>
>> DE catch up to this COND_EXEC, but this time MCBP issued,
>   thus DE treats all following packages as NOP. The following
>   VM switch packages now looks just as NOP to DE, so DE
>   dosen't do VM flush at all.
>
>> Now CE proceeds to the first IBc, and triggers VM fault,
>   because DE didn't do VM flush for this DMAframe.
>
> 3) change estimated alloc size for gfx9.
> with new DMAframe scheme, we need modify emit_frame_size
> for gfx9
>
> with above changes, no more 128 NOPs w/a after VM flush

So we are back to square one? Cause that is exactly what we used to have 
which you replaced with the 128 NOPs.

>
> Change-Id: Ib3f92d9d5a81bfff0369a00f23e1e5891797089a
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Sorry for the delay, have been busy with tracking down a hardware issue.

First of all please fix your editor to correctly indent your code. This 
file once more doesn't has correct indentation.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 77 +++++++++++++++++++++-------------
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 29 ++++++++-----
>   3 files changed, 69 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index d103270..b300929 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -167,9 +167,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   		return r;
>   	}
>   
> -	if (ring->funcs->init_cond_exec)
> -		patch_offset = amdgpu_ring_init_cond_exec(ring);
> -
>   	if (vm) {
>   		amdgpu_ring_insert_nop(ring, extra_nop); /* prevent CE go too fast than DE */
>   
> @@ -180,7 +177,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   		}
>   	}
>   
> -	if (ring->funcs->emit_hdp_flush
> +	if (ring->funcs->init_cond_exec)
> +		patch_offset = amdgpu_ring_init_cond_exec(ring);
> +
> +		if (ring->funcs->emit_hdp_flush

Ok, so the conditional execution shouldn't cover the VM flush any more. 
That makes perfect sense.

Maybe split that up as separate patch? Would make reviewing and editing 
this one easier.

>   #ifdef CONFIG_X86_64
>   	    && !(adev->flags & AMD_IS_APU)
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9ff445c..74be4fa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -483,42 +483,59 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
>   		id->oa_size != job->oa_size);
>   	int r;
>   
> -	if (ring->funcs->emit_pipeline_sync && (
> -	    job->vm_needs_flush || gds_switch_needed ||
> -	    amdgpu_vm_ring_has_compute_vm_bug(ring)))
> -		amdgpu_ring_emit_pipeline_sync(ring);
> +	if (job->vm_needs_flush || gds_switch_needed ||
> +		amdgpu_vm_is_gpu_reset(adev, id) ||
> +		amdgpu_vm_ring_has_compute_vm_bug(ring)) {
> +		unsigned patch_offset = 0;
>   
> -	if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
> -	    amdgpu_vm_is_gpu_reset(adev, id))) {
> -		struct fence *fence;
> -		u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
> +		if (ring->funcs->init_cond_exec)
> +			patch_offset = amdgpu_ring_init_cond_exec(ring);

Ok, why are you inserting a cond_exec for the VM flush here?

>   
> -		trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
> -		amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
> +		if (ring->funcs->emit_pipeline_sync &&
> +			(job->vm_needs_flush || gds_switch_needed ||
> +			amdgpu_vm_ring_has_compute_vm_bug(ring)))
> +			amdgpu_ring_emit_pipeline_sync(ring);
>   
> -		r = amdgpu_fence_emit(ring, &fence);
> -		if (r)
> -			return r;
> +		if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
> +			amdgpu_vm_is_gpu_reset(adev, id))) {
> +			struct fence *fence;
> +			u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
>   
> -		mutex_lock(&adev->vm_manager.lock);
> -		fence_put(id->last_flush);
> -		id->last_flush = fence;
> -		mutex_unlock(&adev->vm_manager.lock);
> -	}
> +			trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
> +			amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
>   
> -	if (gds_switch_needed) {
> -		id->gds_base = job->gds_base;
> -		id->gds_size = job->gds_size;
> -		id->gws_base = job->gws_base;
> -		id->gws_size = job->gws_size;
> -		id->oa_base = job->oa_base;
> -		id->oa_size = job->oa_size;
> -		amdgpu_ring_emit_gds_switch(ring, job->vm_id,
> -					    job->gds_base, job->gds_size,
> -					    job->gws_base, job->gws_size,
> -					    job->oa_base, job->oa_size);
> -	}
> +			r = amdgpu_fence_emit(ring, &fence);
> +			if (r)
> +				return r;
>   
> +			mutex_lock(&adev->vm_manager.lock);
> +			fence_put(id->last_flush);
> +			id->last_flush = fence;
> +			mutex_unlock(&adev->vm_manager.lock);
> +		}
> +
> +		if (gds_switch_needed) {
> +			id->gds_base = job->gds_base;
> +			id->gds_size = job->gds_size;
> +			id->gws_base = job->gws_base;
> +			id->gws_size = job->gws_size;
> +			id->oa_base = job->oa_base;
> +			id->oa_size = job->oa_size;
> +			amdgpu_ring_emit_gds_switch(ring, job->vm_id,
> +							job->gds_base, job->gds_size,
> +							job->gws_base, job->gws_size,
> +							job->oa_base, job->oa_size);
> +		}
> +
> +		if (ring->funcs->patch_cond_exec)
> +			amdgpu_ring_patch_cond_exec(ring, patch_offset);
> +
> +		/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
> +		if (ring->funcs->emit_switch_buffer) {
> +			amdgpu_ring_emit_switch_buffer(ring);
> +			amdgpu_ring_emit_switch_buffer(ring);
> +		}
> +	}
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ce6fa03..eb8551b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3134,8 +3134,6 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>   		/* sync PFP to ME, otherwise we might get invalid PFP reads */
>   		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
>   		amdgpu_ring_write(ring, 0x0);
> -		/* Emits 128 dw nop to prevent CE access VM before vm_flush finish */
> -		amdgpu_ring_insert_nop(ring, 128);

If this isn't needed any more you should remove that from gfx8 and gfx7 
as well.

Regards,
Christian.

>   	}
>   }
>   
> @@ -3629,15 +3627,24 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>   	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
>   	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
>   	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
> -	.emit_frame_size =
> -		20 + /* gfx_v9_0_ring_emit_gds_switch */
> -		7 + /* gfx_v9_0_ring_emit_hdp_flush */
> -		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
> -		8 + 8 + 8 +/* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
> -		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> -		128 + 66 + /* gfx_v9_0_ring_emit_vm_flush */
> -		2 + /* gfx_v9_ring_emit_sb */
> -		3, /* gfx_v9_ring_emit_cntxcntl */
> +	.emit_frame_size = /* totally 242 maximum if 16 IBs */
> +		5 +  /* COND_EXEC */
> +		7 +  /* PIPELINE_SYNC */
> +		46 + /* VM_FLUSH */
> +		8 +  /* FENCE for VM_FLUSH */
> +		20 + /* GDS switch */
> +		4 + /* double SWITCH_BUFFER,
> +		       the first COND_EXEC jump to the place just
> +			   prior to this double SWITCH_BUFFER  */
> +		5 + /* COND_EXEC */
> +		7 +	 /*	HDP_flush */
> +		4 +	 /*	VGT_flush */
> +		14 + /*	CE_META */
> +		31 + /*	DE_META */
> +		3 + /* CNTX_CTRL */
> +		5 + /* HDP_INVL */
> +		8 + 8 + /* FENCE x2 */
> +		2, /* SWITCH_BUFFER */
>   	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
>   	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
>   	.emit_fence = gfx_v9_0_ring_emit_fence,


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* 答复: [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme
       [not found]         ` <0befd76b-3656-af37-e820-8d84660cf20e-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-03-28  3:23           ` Liu, Monk
       [not found]             ` <DM5PR12MB16103525AF905E70A4BB6C0C84320-2J9CzHegvk++jCVTvoAFKAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 34+ messages in thread
From: Liu, Monk @ 2017-03-28  3:23 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Christian

For the 128 NOPs after VM flush, already removed for gfx8 & 7

BR  Monk

-----邮件原件-----
发件人: Christian König [mailto:deathsimple@vodafone.de] 
发送时间: Tuesday, March 28, 2017 12:17 AM
收件人: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
主题: Re: [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme

Am 24.03.2017 um 11:38 schrieb Monk Liu:
> 1) Adapt to vulkan:
> Now use double SWITCH BUFFER to replace the 128 nops w/a, because when 
> vulkan introduced, umd can insert 7 ~ 16 IBs per submit which makes 
> 256 DW size cannot hold the whole DMAframe (if we still insert those 
> 128 nops), CP team suggests use double SWITCH_BUFFERs, instead of 
> tricky 128 NOPs w/a.
>
> 2) To fix the CE VM fault issue when MCBP introduced:
> Need one more COND_EXEC wrapping IB part (original one us for VM 
> switch part).
>
> this change can fix vm fault issue caused by below scenario without 
> this change:
>
>> CE passed original COND_EXEC (no MCBP issued this moment),
>   proceed as normal.
>
>> DE catch up to this COND_EXEC, but this time MCBP issued,
>   thus DE treats all following packages as NOP. The following
>   VM switch packages now looks just as NOP to DE, so DE
>   dosen't do VM flush at all.
>
>> Now CE proceeds to the first IBc, and triggers VM fault,
>   because DE didn't do VM flush for this DMAframe.
>
> 3) change estimated alloc size for gfx9.
> with new DMAframe scheme, we need modify emit_frame_size for gfx9
>
> with above changes, no more 128 NOPs w/a after VM flush

So we are back to square one? Cause that is exactly what we used to have which you replaced with the 128 NOPs.

>
> Change-Id: Ib3f92d9d5a81bfff0369a00f23e1e5891797089a
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Sorry for the delay, have been busy with tracking down a hardware issue.

First of all please fix your editor to correctly indent your code. This file once more doesn't has correct indentation.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 ++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 77 +++++++++++++++++++++-------------
>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 29 ++++++++-----
>   3 files changed, 69 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index d103270..b300929 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -167,9 +167,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   		return r;
>   	}
>   
> -	if (ring->funcs->init_cond_exec)
> -		patch_offset = amdgpu_ring_init_cond_exec(ring);
> -
>   	if (vm) {
>   		amdgpu_ring_insert_nop(ring, extra_nop); /* prevent CE go too fast 
> than DE */
>   
> @@ -180,7 +177,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>   		}
>   	}
>   
> -	if (ring->funcs->emit_hdp_flush
> +	if (ring->funcs->init_cond_exec)
> +		patch_offset = amdgpu_ring_init_cond_exec(ring);
> +
> +		if (ring->funcs->emit_hdp_flush

Ok, so the conditional execution shouldn't cover the VM flush any more. 
That makes perfect sense.

Maybe split that up as separate patch? Would make reviewing and editing this one easier.

>   #ifdef CONFIG_X86_64
>   	    && !(adev->flags & AMD_IS_APU)
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 9ff445c..74be4fa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -483,42 +483,59 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
>   		id->oa_size != job->oa_size);
>   	int r;
>   
> -	if (ring->funcs->emit_pipeline_sync && (
> -	    job->vm_needs_flush || gds_switch_needed ||
> -	    amdgpu_vm_ring_has_compute_vm_bug(ring)))
> -		amdgpu_ring_emit_pipeline_sync(ring);
> +	if (job->vm_needs_flush || gds_switch_needed ||
> +		amdgpu_vm_is_gpu_reset(adev, id) ||
> +		amdgpu_vm_ring_has_compute_vm_bug(ring)) {
> +		unsigned patch_offset = 0;
>   
> -	if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
> -	    amdgpu_vm_is_gpu_reset(adev, id))) {
> -		struct fence *fence;
> -		u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
> +		if (ring->funcs->init_cond_exec)
> +			patch_offset = amdgpu_ring_init_cond_exec(ring);

Ok, why are you inserting a cond_exec for the VM flush here?

>   
> -		trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
> -		amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
> +		if (ring->funcs->emit_pipeline_sync &&
> +			(job->vm_needs_flush || gds_switch_needed ||
> +			amdgpu_vm_ring_has_compute_vm_bug(ring)))
> +			amdgpu_ring_emit_pipeline_sync(ring);
>   
> -		r = amdgpu_fence_emit(ring, &fence);
> -		if (r)
> -			return r;
> +		if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
> +			amdgpu_vm_is_gpu_reset(adev, id))) {
> +			struct fence *fence;
> +			u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
>   
> -		mutex_lock(&adev->vm_manager.lock);
> -		fence_put(id->last_flush);
> -		id->last_flush = fence;
> -		mutex_unlock(&adev->vm_manager.lock);
> -	}
> +			trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
> +			amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
>   
> -	if (gds_switch_needed) {
> -		id->gds_base = job->gds_base;
> -		id->gds_size = job->gds_size;
> -		id->gws_base = job->gws_base;
> -		id->gws_size = job->gws_size;
> -		id->oa_base = job->oa_base;
> -		id->oa_size = job->oa_size;
> -		amdgpu_ring_emit_gds_switch(ring, job->vm_id,
> -					    job->gds_base, job->gds_size,
> -					    job->gws_base, job->gws_size,
> -					    job->oa_base, job->oa_size);
> -	}
> +			r = amdgpu_fence_emit(ring, &fence);
> +			if (r)
> +				return r;
>   
> +			mutex_lock(&adev->vm_manager.lock);
> +			fence_put(id->last_flush);
> +			id->last_flush = fence;
> +			mutex_unlock(&adev->vm_manager.lock);
> +		}
> +
> +		if (gds_switch_needed) {
> +			id->gds_base = job->gds_base;
> +			id->gds_size = job->gds_size;
> +			id->gws_base = job->gws_base;
> +			id->gws_size = job->gws_size;
> +			id->oa_base = job->oa_base;
> +			id->oa_size = job->oa_size;
> +			amdgpu_ring_emit_gds_switch(ring, job->vm_id,
> +							job->gds_base, job->gds_size,
> +							job->gws_base, job->gws_size,
> +							job->oa_base, job->oa_size);
> +		}
> +
> +		if (ring->funcs->patch_cond_exec)
> +			amdgpu_ring_patch_cond_exec(ring, patch_offset);
> +
> +		/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
> +		if (ring->funcs->emit_switch_buffer) {
> +			amdgpu_ring_emit_switch_buffer(ring);
> +			amdgpu_ring_emit_switch_buffer(ring);
> +		}
> +	}
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ce6fa03..eb8551b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -3134,8 +3134,6 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>   		/* sync PFP to ME, otherwise we might get invalid PFP reads */
>   		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
>   		amdgpu_ring_write(ring, 0x0);
> -		/* Emits 128 dw nop to prevent CE access VM before vm_flush finish */
> -		amdgpu_ring_insert_nop(ring, 128);

If this isn't needed any more you should remove that from gfx8 and gfx7 as well.

Regards,
Christian.

>   	}
>   }
>   
> @@ -3629,15 +3627,24 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>   	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
>   	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
>   	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
> -	.emit_frame_size =
> -		20 + /* gfx_v9_0_ring_emit_gds_switch */
> -		7 + /* gfx_v9_0_ring_emit_hdp_flush */
> -		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
> -		8 + 8 + 8 +/* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
> -		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
> -		128 + 66 + /* gfx_v9_0_ring_emit_vm_flush */
> -		2 + /* gfx_v9_ring_emit_sb */
> -		3, /* gfx_v9_ring_emit_cntxcntl */
> +	.emit_frame_size = /* totally 242 maximum if 16 IBs */
> +		5 +  /* COND_EXEC */
> +		7 +  /* PIPELINE_SYNC */
> +		46 + /* VM_FLUSH */
> +		8 +  /* FENCE for VM_FLUSH */
> +		20 + /* GDS switch */
> +		4 + /* double SWITCH_BUFFER,
> +		       the first COND_EXEC jump to the place just
> +			   prior to this double SWITCH_BUFFER  */
> +		5 + /* COND_EXEC */
> +		7 +	 /*	HDP_flush */
> +		4 +	 /*	VGT_flush */
> +		14 + /*	CE_META */
> +		31 + /*	DE_META */
> +		3 + /* CNTX_CTRL */
> +		5 + /* HDP_INVL */
> +		8 + 8 + /* FENCE x2 */
> +		2, /* SWITCH_BUFFER */
>   	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
>   	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
>   	.emit_fence = gfx_v9_0_ring_emit_fence,


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* Re: 答复: [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme
       [not found]             ` <DM5PR12MB16103525AF905E70A4BB6C0C84320-2J9CzHegvk++jCVTvoAFKAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2017-03-28  8:33               ` Christian König
       [not found]                 ` <fc8681ca-4fa1-b75e-da70-7e573059cb85-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 34+ messages in thread
From: Christian König @ 2017-03-28  8:33 UTC (permalink / raw)
  To: Liu, Monk, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

No sure what version of the code your are working with, but at least on 
amd-staging-4.9 the 128 NOPs are still present for GFX8.

GFX7 uses the double switch buffer package and that can be removed if 
you add this in the vm_flush code.

Additional to that you haven't answered why we need the conditional 
execution for the vm flush?

Regards,
Christian.

Am 28.03.2017 um 05:23 schrieb Liu, Monk:
> Christian
>
> For the 128 NOPs after VM flush, already removed for gfx8 & 7
>
> BR  Monk
>
> -----邮件原件-----
> 发件人: Christian König [mailto:deathsimple@vodafone.de]
> 发送时间: Tuesday, March 28, 2017 12:17 AM
> 收件人: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
> 主题: Re: [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme
>
> Am 24.03.2017 um 11:38 schrieb Monk Liu:
>> 1) Adapt to vulkan:
>> Now use double SWITCH BUFFER to replace the 128 nops w/a, because when
>> vulkan introduced, umd can insert 7 ~ 16 IBs per submit which makes
>> 256 DW size cannot hold the whole DMAframe (if we still insert those
>> 128 nops), CP team suggests use double SWITCH_BUFFERs, instead of
>> tricky 128 NOPs w/a.
>>
>> 2) To fix the CE VM fault issue when MCBP introduced:
>> Need one more COND_EXEC wrapping IB part (original one us for VM
>> switch part).
>>
>> this change can fix vm fault issue caused by below scenario without
>> this change:
>>
>>> CE passed original COND_EXEC (no MCBP issued this moment),
>>    proceed as normal.
>>
>>> DE catch up to this COND_EXEC, but this time MCBP issued,
>>    thus DE treats all following packages as NOP. The following
>>    VM switch packages now looks just as NOP to DE, so DE
>>    dosen't do VM flush at all.
>>
>>> Now CE proceeds to the first IBc, and triggers VM fault,
>>    because DE didn't do VM flush for this DMAframe.
>>
>> 3) change estimated alloc size for gfx9.
>> with new DMAframe scheme, we need modify emit_frame_size for gfx9
>>
>> with above changes, no more 128 NOPs w/a after VM flush
> So we are back to square one? Cause that is exactly what we used to have which you replaced with the 128 NOPs.
>
>> Change-Id: Ib3f92d9d5a81bfff0369a00f23e1e5891797089a
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> Sorry for the delay, have been busy with tracking down a hardware issue.
>
> First of all please fix your editor to correctly indent your code. This file once more doesn't has correct indentation.
>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 ++--
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 77 +++++++++++++++++++++-------------
>>    drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 29 ++++++++-----
>>    3 files changed, 69 insertions(+), 45 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> index d103270..b300929 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> @@ -167,9 +167,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>>    		return r;
>>    	}
>>    
>> -	if (ring->funcs->init_cond_exec)
>> -		patch_offset = amdgpu_ring_init_cond_exec(ring);
>> -
>>    	if (vm) {
>>    		amdgpu_ring_insert_nop(ring, extra_nop); /* prevent CE go too fast
>> than DE */
>>    
>> @@ -180,7 +177,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>>    		}
>>    	}
>>    
>> -	if (ring->funcs->emit_hdp_flush
>> +	if (ring->funcs->init_cond_exec)
>> +		patch_offset = amdgpu_ring_init_cond_exec(ring);
>> +
>> +		if (ring->funcs->emit_hdp_flush
> Ok, so the conditional execution shouldn't cover the VM flush any more.
> That makes perfect sense.
>
> Maybe split that up as separate patch? Would make reviewing and editing this one easier.
>
>>    #ifdef CONFIG_X86_64
>>    	    && !(adev->flags & AMD_IS_APU)
>>    #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 9ff445c..74be4fa 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -483,42 +483,59 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
>>    		id->oa_size != job->oa_size);
>>    	int r;
>>    
>> -	if (ring->funcs->emit_pipeline_sync && (
>> -	    job->vm_needs_flush || gds_switch_needed ||
>> -	    amdgpu_vm_ring_has_compute_vm_bug(ring)))
>> -		amdgpu_ring_emit_pipeline_sync(ring);
>> +	if (job->vm_needs_flush || gds_switch_needed ||
>> +		amdgpu_vm_is_gpu_reset(adev, id) ||
>> +		amdgpu_vm_ring_has_compute_vm_bug(ring)) {
>> +		unsigned patch_offset = 0;
>>    
>> -	if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
>> -	    amdgpu_vm_is_gpu_reset(adev, id))) {
>> -		struct fence *fence;
>> -		u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
>> +		if (ring->funcs->init_cond_exec)
>> +			patch_offset = amdgpu_ring_init_cond_exec(ring);
> Ok, why are you inserting a cond_exec for the VM flush here?
>
>>    
>> -		trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
>> -		amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
>> +		if (ring->funcs->emit_pipeline_sync &&
>> +			(job->vm_needs_flush || gds_switch_needed ||
>> +			amdgpu_vm_ring_has_compute_vm_bug(ring)))
>> +			amdgpu_ring_emit_pipeline_sync(ring);
>>    
>> -		r = amdgpu_fence_emit(ring, &fence);
>> -		if (r)
>> -			return r;
>> +		if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
>> +			amdgpu_vm_is_gpu_reset(adev, id))) {
>> +			struct fence *fence;
>> +			u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
>>    
>> -		mutex_lock(&adev->vm_manager.lock);
>> -		fence_put(id->last_flush);
>> -		id->last_flush = fence;
>> -		mutex_unlock(&adev->vm_manager.lock);
>> -	}
>> +			trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
>> +			amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
>>    
>> -	if (gds_switch_needed) {
>> -		id->gds_base = job->gds_base;
>> -		id->gds_size = job->gds_size;
>> -		id->gws_base = job->gws_base;
>> -		id->gws_size = job->gws_size;
>> -		id->oa_base = job->oa_base;
>> -		id->oa_size = job->oa_size;
>> -		amdgpu_ring_emit_gds_switch(ring, job->vm_id,
>> -					    job->gds_base, job->gds_size,
>> -					    job->gws_base, job->gws_size,
>> -					    job->oa_base, job->oa_size);
>> -	}
>> +			r = amdgpu_fence_emit(ring, &fence);
>> +			if (r)
>> +				return r;
>>    
>> +			mutex_lock(&adev->vm_manager.lock);
>> +			fence_put(id->last_flush);
>> +			id->last_flush = fence;
>> +			mutex_unlock(&adev->vm_manager.lock);
>> +		}
>> +
>> +		if (gds_switch_needed) {
>> +			id->gds_base = job->gds_base;
>> +			id->gds_size = job->gds_size;
>> +			id->gws_base = job->gws_base;
>> +			id->gws_size = job->gws_size;
>> +			id->oa_base = job->oa_base;
>> +			id->oa_size = job->oa_size;
>> +			amdgpu_ring_emit_gds_switch(ring, job->vm_id,
>> +							job->gds_base, job->gds_size,
>> +							job->gws_base, job->gws_size,
>> +							job->oa_base, job->oa_size);
>> +		}
>> +
>> +		if (ring->funcs->patch_cond_exec)
>> +			amdgpu_ring_patch_cond_exec(ring, patch_offset);
>> +
>> +		/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
>> +		if (ring->funcs->emit_switch_buffer) {
>> +			amdgpu_ring_emit_switch_buffer(ring);
>> +			amdgpu_ring_emit_switch_buffer(ring);
>> +		}
>> +	}
>>    	return 0;
>>    }
>>    
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index ce6fa03..eb8551b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -3134,8 +3134,6 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>>    		/* sync PFP to ME, otherwise we might get invalid PFP reads */
>>    		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
>>    		amdgpu_ring_write(ring, 0x0);
>> -		/* Emits 128 dw nop to prevent CE access VM before vm_flush finish */
>> -		amdgpu_ring_insert_nop(ring, 128);
> If this isn't needed any more you should remove that from gfx8 and gfx7 as well.
>
> Regards,
> Christian.
>
>>    	}
>>    }
>>    
>> @@ -3629,15 +3627,24 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>>    	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
>>    	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
>>    	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
>> -	.emit_frame_size =
>> -		20 + /* gfx_v9_0_ring_emit_gds_switch */
>> -		7 + /* gfx_v9_0_ring_emit_hdp_flush */
>> -		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
>> -		8 + 8 + 8 +/* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
>> -		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
>> -		128 + 66 + /* gfx_v9_0_ring_emit_vm_flush */
>> -		2 + /* gfx_v9_ring_emit_sb */
>> -		3, /* gfx_v9_ring_emit_cntxcntl */
>> +	.emit_frame_size = /* totally 242 maximum if 16 IBs */
>> +		5 +  /* COND_EXEC */
>> +		7 +  /* PIPELINE_SYNC */
>> +		46 + /* VM_FLUSH */
>> +		8 +  /* FENCE for VM_FLUSH */
>> +		20 + /* GDS switch */
>> +		4 + /* double SWITCH_BUFFER,
>> +		       the first COND_EXEC jump to the place just
>> +			   prior to this double SWITCH_BUFFER  */
>> +		5 + /* COND_EXEC */
>> +		7 +	 /*	HDP_flush */
>> +		4 +	 /*	VGT_flush */
>> +		14 + /*	CE_META */
>> +		31 + /*	DE_META */
>> +		3 + /* CNTX_CTRL */
>> +		5 + /* HDP_INVL */
>> +		8 + 8 + /* FENCE x2 */
>> +		2, /* SWITCH_BUFFER */
>>    	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
>>    	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
>>    	.emit_fence = gfx_v9_0_ring_emit_fence,
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

* 答复: 答复: [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme
       [not found]                 ` <fc8681ca-4fa1-b75e-da70-7e573059cb85-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-03-28  8:42                   ` Liu, Monk
  0 siblings, 0 replies; 34+ messages in thread
From: Liu, Monk @ 2017-03-28  8:42 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

No sure what version of the code your are working with, but at least on
amd-staging-4.9 the 128 NOPs are still present for GFX8.
[ml] the following patch will remove the 128 NOPs after vm flush for gfx8

GFX7 uses the double switch buffer package and that can be removed if you add this in the vm_flush code.
[ML] GFX7 doesn't support SRIOV, so we don't set the "switch_buffer" function member " emit_switch_buffer" to its ring structure, thus it need manually insert switch_buffers in gfx_v7_0_vm_flush() 
But for gfx 8 and gfx9, because they support SRIOV, so we set the function member "emit_switch_buffer", and the ib_schedule() will know it and will insert it at bottom of each frame.
For amdgpu_vm_flush(), it will only insert SWITCH_BUFFER when the gfx ring has " emit_switch_buffer" member, so amdgpu_vm_flush() actually behaves the same for gfx7 and gfx6, only affect behave of gfx8 & gfx9, on SRIOV case.

Additional to that you haven't answered why we need the conditional execution for the vm flush?

[ML]I already answered the question, in git log message:
>> this change can fix vm fault issue caused by below scenario without 
>> this change:
>>
>>> CE passed original COND_EXEC (no MCBP issued this moment),
>>    proceed as normal.
>>
>>> DE catch up to this COND_EXEC, but this time MCBP issued,
>>    thus DE treats all following packages as NOP. The following
>>    VM switch packages now looks just as NOP to DE, so DE
>>    dosen't do VM flush at all.
>>
>>> Now CE proceeds to the first IBc, and triggers VM fault,
>>    because DE didn't do VM flush for this DMAframe.

With this new CE wrapping around IB, that can avoid above corner case.


-----邮件原件-----
发件人: Christian König [mailto:deathsimple@vodafone.de] 
发送时间: Tuesday, March 28, 2017 4:34 PM
收件人: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
主题: Re: 答复: [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme

No sure what version of the code your are working with, but at least on
amd-staging-4.9 the 128 NOPs are still present for GFX8.

GFX7 uses the double switch buffer package and that can be removed if you add this in the vm_flush code.

Additional to that you haven't answered why we need the conditional execution for the vm flush?

Regards,
Christian.

Am 28.03.2017 um 05:23 schrieb Liu, Monk:
> Christian
>
> For the 128 NOPs after VM flush, already removed for gfx8 & 7
>
> BR  Monk
>
> -----邮件原件-----
> 发件人: Christian König [mailto:deathsimple@vodafone.de]
> 发送时间: Tuesday, March 28, 2017 12:17 AM
> 收件人: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
> 主题: Re: [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme
>
> Am 24.03.2017 um 11:38 schrieb Monk Liu:
>> 1) Adapt to vulkan:
>> Now use double SWITCH BUFFER to replace the 128 nops w/a, because 
>> when vulkan introduced, umd can insert 7 ~ 16 IBs per submit which 
>> makes
>> 256 DW size cannot hold the whole DMAframe (if we still insert those
>> 128 nops), CP team suggests use double SWITCH_BUFFERs, instead of 
>> tricky 128 NOPs w/a.
>>
>> 2) To fix the CE VM fault issue when MCBP introduced:
>> Need one more COND_EXEC wrapping IB part (original one us for VM 
>> switch part).
>>
>> this change can fix vm fault issue caused by below scenario without 
>> this change:
>>
>>> CE passed original COND_EXEC (no MCBP issued this moment),
>>    proceed as normal.
>>
>>> DE catch up to this COND_EXEC, but this time MCBP issued,
>>    thus DE treats all following packages as NOP. The following
>>    VM switch packages now looks just as NOP to DE, so DE
>>    dosen't do VM flush at all.
>>
>>> Now CE proceeds to the first IBc, and triggers VM fault,
>>    because DE didn't do VM flush for this DMAframe.
>>
>> 3) change estimated alloc size for gfx9.
>> with new DMAframe scheme, we need modify emit_frame_size for gfx9
>>
>> with above changes, no more 128 NOPs w/a after VM flush
> So we are back to square one? Cause that is exactly what we used to have which you replaced with the 128 NOPs.
>
>> Change-Id: Ib3f92d9d5a81bfff0369a00f23e1e5891797089a
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> Sorry for the delay, have been busy with tracking down a hardware issue.
>
> First of all please fix your editor to correctly indent your code. This file once more doesn't has correct indentation.
>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 ++--
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 77 +++++++++++++++++++++-------------
>>    drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c  | 29 ++++++++-----
>>    3 files changed, 69 insertions(+), 45 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> index d103270..b300929 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
>> @@ -167,9 +167,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>>    		return r;
>>    	}
>>    
>> -	if (ring->funcs->init_cond_exec)
>> -		patch_offset = amdgpu_ring_init_cond_exec(ring);
>> -
>>    	if (vm) {
>>    		amdgpu_ring_insert_nop(ring, extra_nop); /* prevent CE go too 
>> fast than DE */
>>    
>> @@ -180,7 +177,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>>    		}
>>    	}
>>    
>> -	if (ring->funcs->emit_hdp_flush
>> +	if (ring->funcs->init_cond_exec)
>> +		patch_offset = amdgpu_ring_init_cond_exec(ring);
>> +
>> +		if (ring->funcs->emit_hdp_flush
> Ok, so the conditional execution shouldn't cover the VM flush any more.
> That makes perfect sense.
>
> Maybe split that up as separate patch? Would make reviewing and editing this one easier.
>
>>    #ifdef CONFIG_X86_64
>>    	    && !(adev->flags & AMD_IS_APU)
>>    #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 9ff445c..74be4fa 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -483,42 +483,59 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
>>    		id->oa_size != job->oa_size);
>>    	int r;
>>    
>> -	if (ring->funcs->emit_pipeline_sync && (
>> -	    job->vm_needs_flush || gds_switch_needed ||
>> -	    amdgpu_vm_ring_has_compute_vm_bug(ring)))
>> -		amdgpu_ring_emit_pipeline_sync(ring);
>> +	if (job->vm_needs_flush || gds_switch_needed ||
>> +		amdgpu_vm_is_gpu_reset(adev, id) ||
>> +		amdgpu_vm_ring_has_compute_vm_bug(ring)) {
>> +		unsigned patch_offset = 0;
>>    
>> -	if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
>> -	    amdgpu_vm_is_gpu_reset(adev, id))) {
>> -		struct fence *fence;
>> -		u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
>> +		if (ring->funcs->init_cond_exec)
>> +			patch_offset = amdgpu_ring_init_cond_exec(ring);
> Ok, why are you inserting a cond_exec for the VM flush here?
>
>>    
>> -		trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
>> -		amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
>> +		if (ring->funcs->emit_pipeline_sync &&
>> +			(job->vm_needs_flush || gds_switch_needed ||
>> +			amdgpu_vm_ring_has_compute_vm_bug(ring)))
>> +			amdgpu_ring_emit_pipeline_sync(ring);
>>    
>> -		r = amdgpu_fence_emit(ring, &fence);
>> -		if (r)
>> -			return r;
>> +		if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
>> +			amdgpu_vm_is_gpu_reset(adev, id))) {
>> +			struct fence *fence;
>> +			u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
>>    
>> -		mutex_lock(&adev->vm_manager.lock);
>> -		fence_put(id->last_flush);
>> -		id->last_flush = fence;
>> -		mutex_unlock(&adev->vm_manager.lock);
>> -	}
>> +			trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
>> +			amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
>>    
>> -	if (gds_switch_needed) {
>> -		id->gds_base = job->gds_base;
>> -		id->gds_size = job->gds_size;
>> -		id->gws_base = job->gws_base;
>> -		id->gws_size = job->gws_size;
>> -		id->oa_base = job->oa_base;
>> -		id->oa_size = job->oa_size;
>> -		amdgpu_ring_emit_gds_switch(ring, job->vm_id,
>> -					    job->gds_base, job->gds_size,
>> -					    job->gws_base, job->gws_size,
>> -					    job->oa_base, job->oa_size);
>> -	}
>> +			r = amdgpu_fence_emit(ring, &fence);
>> +			if (r)
>> +				return r;
>>    
>> +			mutex_lock(&adev->vm_manager.lock);
>> +			fence_put(id->last_flush);
>> +			id->last_flush = fence;
>> +			mutex_unlock(&adev->vm_manager.lock);
>> +		}
>> +
>> +		if (gds_switch_needed) {
>> +			id->gds_base = job->gds_base;
>> +			id->gds_size = job->gds_size;
>> +			id->gws_base = job->gws_base;
>> +			id->gws_size = job->gws_size;
>> +			id->oa_base = job->oa_base;
>> +			id->oa_size = job->oa_size;
>> +			amdgpu_ring_emit_gds_switch(ring, job->vm_id,
>> +							job->gds_base, job->gds_size,
>> +							job->gws_base, job->gws_size,
>> +							job->oa_base, job->oa_size);
>> +		}
>> +
>> +		if (ring->funcs->patch_cond_exec)
>> +			amdgpu_ring_patch_cond_exec(ring, patch_offset);
>> +
>> +		/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
>> +		if (ring->funcs->emit_switch_buffer) {
>> +			amdgpu_ring_emit_switch_buffer(ring);
>> +			amdgpu_ring_emit_switch_buffer(ring);
>> +		}
>> +	}
>>    	return 0;
>>    }
>>    
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index ce6fa03..eb8551b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -3134,8 +3134,6 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
>>    		/* sync PFP to ME, otherwise we might get invalid PFP reads */
>>    		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
>>    		amdgpu_ring_write(ring, 0x0);
>> -		/* Emits 128 dw nop to prevent CE access VM before vm_flush finish */
>> -		amdgpu_ring_insert_nop(ring, 128);
> If this isn't needed any more you should remove that from gfx8 and gfx7 as well.
>
> Regards,
> Christian.
>
>>    	}
>>    }
>>    
>> @@ -3629,15 +3627,24 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>>    	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
>>    	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
>>    	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
>> -	.emit_frame_size =
>> -		20 + /* gfx_v9_0_ring_emit_gds_switch */
>> -		7 + /* gfx_v9_0_ring_emit_hdp_flush */
>> -		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
>> -		8 + 8 + 8 +/* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
>> -		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
>> -		128 + 66 + /* gfx_v9_0_ring_emit_vm_flush */
>> -		2 + /* gfx_v9_ring_emit_sb */
>> -		3, /* gfx_v9_ring_emit_cntxcntl */
>> +	.emit_frame_size = /* totally 242 maximum if 16 IBs */
>> +		5 +  /* COND_EXEC */
>> +		7 +  /* PIPELINE_SYNC */
>> +		46 + /* VM_FLUSH */
>> +		8 +  /* FENCE for VM_FLUSH */
>> +		20 + /* GDS switch */
>> +		4 + /* double SWITCH_BUFFER,
>> +		       the first COND_EXEC jump to the place just
>> +			   prior to this double SWITCH_BUFFER  */
>> +		5 + /* COND_EXEC */
>> +		7 +	 /*	HDP_flush */
>> +		4 +	 /*	VGT_flush */
>> +		14 + /*	CE_META */
>> +		31 + /*	DE_META */
>> +		3 + /* CNTX_CTRL */
>> +		5 + /* HDP_INVL */
>> +		8 + 8 + /* FENCE x2 */
>> +		2, /* SWITCH_BUFFER */
>>    	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
>>    	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
>>    	.emit_fence = gfx_v9_0_ring_emit_fence,
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 34+ messages in thread

end of thread, other threads:[~2017-03-28  8:42 UTC | newest]

Thread overview: 34+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-24 10:38 [PATCH 00/13] *** VEGA10 SRIOV PATCHES *** Monk Liu
     [not found] ` <1490351912-10122-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-03-24 10:38   ` [PATCH 01/13] drm/amdgpu:imple cond_exec for gfx8 Monk Liu
     [not found]     ` <1490351912-10122-2-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-03-24 14:38       ` Deucher, Alexander
2017-03-24 10:38   ` [PATCH 02/13] drm/amdgpu:enable mcbp for gfx9 Monk Liu
     [not found]     ` <1490351912-10122-3-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-03-24 14:38       ` Deucher, Alexander
2017-03-24 10:38   ` [PATCH 03/13] drm/amdgpu:add KIQ interrupt id Monk Liu
     [not found]     ` <1490351912-10122-4-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-03-24 14:40       ` Deucher, Alexander
2017-03-24 10:38   ` [PATCH 04/13] drm/amdgpu:virt_init_setting invoke is missed! Monk Liu
     [not found]     ` <1490351912-10122-5-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-03-24 14:40       ` Deucher, Alexander
2017-03-24 10:38   ` [PATCH 05/13] drm/amdgpu:fix ring init sequence Monk Liu
     [not found]     ` <1490351912-10122-6-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-03-27 15:54       ` Deucher, Alexander
2017-03-24 10:38   ` [PATCH 06/13] drm/amdgpu:change sequence of SDMA v4 init Monk Liu
     [not found]     ` <1490351912-10122-7-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-03-24 14:46       ` Deucher, Alexander
2017-03-24 10:38   ` [PATCH 07/13] drm/amdgpu:two fixings for sdma v4 for SRIOV Monk Liu
     [not found]     ` <1490351912-10122-8-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-03-24 14:47       ` Deucher, Alexander
2017-03-24 10:38   ` [PATCH 08/13] drm/amdgpu:no cg for soc15 of SRIOV Monk Liu
     [not found]     ` <1490351912-10122-9-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-03-24 14:47       ` Deucher, Alexander
2017-03-24 10:38   ` [PATCH 09/13] drm/amdgpu:fix gmc_v9 vm fault process for SRIOV Monk Liu
     [not found]     ` <1490351912-10122-10-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-03-24 14:49       ` Deucher, Alexander
2017-03-27  1:51       ` Zhang, Jerry (Junwei)
     [not found]         ` <58D8700C.1030305-5C7GfCeVMHo@public.gmane.org>
2017-03-27  6:39           ` Christian König
     [not found]             ` <56a0f0fb-42f7-9b1e-ffaa-caa142a84e1c-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-03-27  7:00               ` Zhang, Jerry (Junwei)
2017-03-24 10:38   ` [PATCH 10/13] drm/amdgpu:fix ring_write_multiple Monk Liu
     [not found]     ` <1490351912-10122-11-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-03-27 15:54       ` Deucher, Alexander
2017-03-24 10:38   ` [PATCH 11/13] drm/amdgpu:fix missing programing critical registers Monk Liu
     [not found]     ` <1490351912-10122-12-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-03-24 14:50       ` Deucher, Alexander
2017-03-24 10:38   ` [PATCH 12/13] drm/amdgpu:changes in gfx DMAframe scheme Monk Liu
     [not found]     ` <1490351912-10122-13-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-03-27 15:59       ` Deucher, Alexander
2017-03-27 16:16       ` Christian König
     [not found]         ` <0befd76b-3656-af37-e820-8d84660cf20e-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-03-28  3:23           ` 答复: " Liu, Monk
     [not found]             ` <DM5PR12MB16103525AF905E70A4BB6C0C84320-2J9CzHegvk++jCVTvoAFKAdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-03-28  8:33               ` Christian König
     [not found]                 ` <fc8681ca-4fa1-b75e-da70-7e573059cb85-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-03-28  8:42                   ` 答复: " Liu, Monk
2017-03-24 10:38   ` [PATCH 13/13] drm/amdgpu:change emit_frame_size of gfx8 Monk Liu
2017-03-24 11:39   ` [PATCH 00/13] *** VEGA10 SRIOV PATCHES *** Yu, Xiangliang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.