All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06 10:52 ` Zhu, Changfeng
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu, Changfeng @ 2019-11-06 10:52 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Tuikov, Luben, Koenig,
	Christian, Huang, Ray, Huang, Shimmer
  Cc: Zhu, Changfeng

From: changzhu <Changfeng.Zhu@amd.com>

The GRBM register interface is now capable of bursting 1 cycle per
register wr->wr, wr->rd much faster than previous muticycle per
transaction done interface.  This has caused a problem where
status registers requiring HW to update have a 1 cycle delay, due
to the register update having to go through GRBM.

For cp ucode, it has realized dummy read in cp firmware.It covers
the use of WAIT_REG_MEM operation 1 case only.So it needs to call
gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to
update firmware in case firmware is too old to have function to realize
dummy read in cp firmware.

For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is
moved to gfxhub in gfx10. So it needs to add dummy read in driver
between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.

Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 48 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
 4 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 459aa9059542..a74ecd449775 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -267,6 +267,7 @@ struct amdgpu_gfx {
 	uint32_t			mec2_feature_version;
 	bool				mec_fw_write_wait;
 	bool				me_fw_write_wait;
+	bool				cp_fw_write_wait;
 	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
 	unsigned			num_gfx_rings;
 	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 17a5cbfd0024..c7a6f98bf6b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
 	kfree(adev->gfx.rlc.register_list_format);
 }
 
+static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+	adev->gfx.cp_fw_write_wait = false;
+
+	switch (adev->asic_type) {
+	case CHIP_NAVI10:
+	case CHIP_NAVI12:
+	case CHIP_NAVI14:
+		if ((adev->gfx.me_fw_version >= 0x00000046) &&
+		    (adev->gfx.me_feature_version >= 27) &&
+		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
+		    (adev->gfx.pfp_feature_version >= 27) &&
+		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
+		    (adev->gfx.mec_feature_version >= 27))
+			adev->gfx.cp_fw_write_wait = true;
+		break;
+	default:
+		break;
+	}
+
+	if (adev->gfx.cp_fw_write_wait == false)
+		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
+			      GRBM requires 1-cycle delay in cp firmware\n");
+}
+
+
 static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
 {
 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
@@ -829,6 +855,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 		}
 	}
 
+	gfx_v10_0_check_fw_write_wait(adev);
 out:
 	if (err) {
 		dev_err(adev->dev,
@@ -4768,6 +4795,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
 }
 
+static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+						   uint32_t reg0, uint32_t reg1,
+						   uint32_t ref, uint32_t mask)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+	struct amdgpu_device *adev = ring->adev;
+	bool fw_version_ok = false;
+
+	fw_version_ok = adev->gfx.cp_fw_write_wait;
+
+	if (fw_version_ok)
+		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+				       ref, mask, 0x20);
+	else
+		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+							   ref, mask);
+}
+
 static void
 gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 				      uint32_t me, uint32_t pipe,
@@ -5158,6 +5203,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 	.emit_tmz = gfx_v10_0_ring_emit_tmz,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
@@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
 	.emit_rreg = gfx_v10_0_ring_emit_rreg,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 3b00bce14cfb..af2615ba52aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
 			      upper_32_bits(pd_addr));
 
-	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
-
-	/* wait for the invalidate to complete */
-	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
-				  1 << vmid, 1 << vmid);
+	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
+					    hub->vm_inv_eng0_ack + eng,
+					    req, 1 << vmid);
 
 	return pd_addr;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 3460c00f3eaa..ec47542e21b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
 }
 
+static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+						   uint32_t reg0, uint32_t reg1,
+						   uint32_t ref, uint32_t mask)
+{
+	amdgpu_ring_emit_wreg(ring, reg0, ref);
+	/* wait for a cycle to reset vm_inv_eng*_ack */
+	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
 static int sdma_v5_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
 		/* sdma_v5_0_ring_emit_vm_flush */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
-		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
 		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
 	.emit_ib = sdma_v5_0_ring_emit_ib,
@@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 	.pad_ib = sdma_v5_0_ring_pad_ib,
 	.emit_wreg = sdma_v5_0_ring_emit_wreg,
 	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
 	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
 	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
 	.preempt_ib = sdma_v5_0_ring_preempt_ib,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06 10:52 ` Zhu, Changfeng
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu, Changfeng @ 2019-11-06 10:52 UTC (permalink / raw)
  To: amd-gfx, Tuikov,  Luben, Koenig, Christian, Huang, Ray, Huang, Shimmer
  Cc: Zhu, Changfeng

From: changzhu <Changfeng.Zhu@amd.com>

The GRBM register interface is now capable of bursting 1 cycle per
register wr->wr, wr->rd much faster than previous muticycle per
transaction done interface.  This has caused a problem where
status registers requiring HW to update have a 1 cycle delay, due
to the register update having to go through GRBM.

For cp ucode, it has realized dummy read in cp firmware.It covers
the use of WAIT_REG_MEM operation 1 case only.So it needs to call
gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to
update firmware in case firmware is too old to have function to realize
dummy read in cp firmware.

For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is
moved to gfxhub in gfx10. So it needs to add dummy read in driver
between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.

Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 48 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
 4 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 459aa9059542..a74ecd449775 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -267,6 +267,7 @@ struct amdgpu_gfx {
 	uint32_t			mec2_feature_version;
 	bool				mec_fw_write_wait;
 	bool				me_fw_write_wait;
+	bool				cp_fw_write_wait;
 	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
 	unsigned			num_gfx_rings;
 	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 17a5cbfd0024..c7a6f98bf6b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
 	kfree(adev->gfx.rlc.register_list_format);
 }
 
+static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+	adev->gfx.cp_fw_write_wait = false;
+
+	switch (adev->asic_type) {
+	case CHIP_NAVI10:
+	case CHIP_NAVI12:
+	case CHIP_NAVI14:
+		if ((adev->gfx.me_fw_version >= 0x00000046) &&
+		    (adev->gfx.me_feature_version >= 27) &&
+		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
+		    (adev->gfx.pfp_feature_version >= 27) &&
+		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
+		    (adev->gfx.mec_feature_version >= 27))
+			adev->gfx.cp_fw_write_wait = true;
+		break;
+	default:
+		break;
+	}
+
+	if (adev->gfx.cp_fw_write_wait == false)
+		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
+			      GRBM requires 1-cycle delay in cp firmware\n");
+}
+
+
 static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
 {
 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
@@ -829,6 +855,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
 		}
 	}
 
+	gfx_v10_0_check_fw_write_wait(adev);
 out:
 	if (err) {
 		dev_err(adev->dev,
@@ -4768,6 +4795,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
 }
 
+static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+						   uint32_t reg0, uint32_t reg1,
+						   uint32_t ref, uint32_t mask)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+	struct amdgpu_device *adev = ring->adev;
+	bool fw_version_ok = false;
+
+	fw_version_ok = adev->gfx.cp_fw_write_wait;
+
+	if (fw_version_ok)
+		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+				       ref, mask, 0x20);
+	else
+		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+							   ref, mask);
+}
+
 static void
 gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 				      uint32_t me, uint32_t pipe,
@@ -5158,6 +5203,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 	.emit_tmz = gfx_v10_0_ring_emit_tmz,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
@@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
 	.emit_rreg = gfx_v10_0_ring_emit_rreg,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 3b00bce14cfb..af2615ba52aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
 			      upper_32_bits(pd_addr));
 
-	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
-
-	/* wait for the invalidate to complete */
-	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
-				  1 << vmid, 1 << vmid);
+	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
+					    hub->vm_inv_eng0_ack + eng,
+					    req, 1 << vmid);
 
 	return pd_addr;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 3460c00f3eaa..ec47542e21b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
 }
 
+static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+						   uint32_t reg0, uint32_t reg1,
+						   uint32_t ref, uint32_t mask)
+{
+	amdgpu_ring_emit_wreg(ring, reg0, ref);
+	/* wait for a cycle to reset vm_inv_eng*_ack */
+	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
 static int sdma_v5_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
 		/* sdma_v5_0_ring_emit_vm_flush */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
-		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
 		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
 	.emit_ib = sdma_v5_0_ring_emit_ib,
@@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 	.pad_ib = sdma_v5_0_ring_pad_ib,
 	.emit_wreg = sdma_v5_0_ring_emit_wreg,
 	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
 	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
 	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
 	.preempt_ib = sdma_v5_0_ring_preempt_ib,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06 12:48     ` Koenig, Christian
  0 siblings, 0 replies; 20+ messages in thread
From: Koenig, Christian @ 2019-11-06 12:48 UTC (permalink / raw)
  To: Zhu, Changfeng, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Tuikov,
	Luben, Huang, Ray, Huang, Shimmer

Am 06.11.19 um 11:52 schrieb Zhu, Changfeng:
> From: changzhu <Changfeng.Zhu@amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per
> register wr->wr, wr->rd much faster than previous muticycle per
> transaction done interface.  This has caused a problem where
> status registers requiring HW to update have a 1 cycle delay, due
> to the register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers
> the use of WAIT_REG_MEM operation 1 case only.So it needs to call
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to
> update firmware in case firmware is too old to have function to realize
> dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is
> moved to gfxhub in gfx10. So it needs to add dummy read in driver
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <Changfeng.Zhu@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 48 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>   4 files changed, 64 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>   	uint32_t			mec2_feature_version;
>   	bool				mec_fw_write_wait;
>   	bool				me_fw_write_wait;
> +	bool				cp_fw_write_wait;
>   	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
>   	unsigned			num_gfx_rings;
>   	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..c7a6f98bf6b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
>   	kfree(adev->gfx.rlc.register_list_format);
>   }
>   
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
> +{
> +	adev->gfx.cp_fw_write_wait = false;
> +
> +	switch (adev->asic_type) {
> +	case CHIP_NAVI10:
> +	case CHIP_NAVI12:
> +	case CHIP_NAVI14:
> +		if ((adev->gfx.me_fw_version >= 0x00000046) &&
> +		    (adev->gfx.me_feature_version >= 27) &&
> +		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
> +		    (adev->gfx.pfp_feature_version >= 27) &&
> +		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
> +		    (adev->gfx.mec_feature_version >= 27))
> +			adev->gfx.cp_fw_write_wait = true;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (adev->gfx.cp_fw_write_wait == false)
> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
> +			      GRBM requires 1-cycle delay in cp firmware\n");
> +}
> +
> +
>   static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>   {
>   	const struct rlc_firmware_header_v2_1 *rlc_hdr;
> @@ -829,6 +855,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
>   		}
>   	}
>   
> +	gfx_v10_0_check_fw_write_wait(adev);
>   out:
>   	if (err) {
>   		dev_err(adev->dev,
> @@ -4768,6 +4795,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						   uint32_t reg0, uint32_t reg1,
> +						   uint32_t ref, uint32_t mask)
> +{
> +	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> +	struct amdgpu_device *adev = ring->adev;
> +	bool fw_version_ok = false;
> +
> +	fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> +	if (fw_version_ok)
> +		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> +				       ref, mask, 0x20);
> +	else
> +		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> +							   ref, mask);
> +}
> +
>   static void
>   gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   				      uint32_t me, uint32_t pipe,
> @@ -5158,6 +5203,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
> @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
> @@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>   	.emit_rreg = gfx_v10_0_ring_emit_rreg,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..af2615ba52aa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>   			      upper_32_bits(pd_addr));
>   
> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -	/* wait for the invalidate to complete */
> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -				  1 << vmid, 1 << vmid);
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> +					    hub->vm_inv_eng0_ack + eng,
> +					    req, 1 << vmid);
>   
>   	return pd_addr;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..ec47542e21b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>   }
>   
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						   uint32_t reg0, uint32_t reg1,
> +						   uint32_t ref, uint32_t mask)
> +{
> +	amdgpu_ring_emit_wreg(ring, reg0, ref);
> +	/* wait for a cycle to reset vm_inv_eng*_ack */
> +	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
> +	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
> +}
> +
>   static int sdma_v5_0_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>   		/* sdma_v5_0_ring_emit_vm_flush */
>   		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> -		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>   		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
>   	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>   	.emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   	.pad_ib = sdma_v5_0_ring_pad_ib,
>   	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>   	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>   	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>   	.preempt_ib = sdma_v5_0_ring_preempt_ib,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06 12:48     ` Koenig, Christian
  0 siblings, 0 replies; 20+ messages in thread
From: Koenig, Christian @ 2019-11-06 12:48 UTC (permalink / raw)
  To: Zhu, Changfeng, amd-gfx, Tuikov, Luben, Huang, Ray, Huang, Shimmer

Am 06.11.19 um 11:52 schrieb Zhu, Changfeng:
> From: changzhu <Changfeng.Zhu@amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per
> register wr->wr, wr->rd much faster than previous muticycle per
> transaction done interface.  This has caused a problem where
> status registers requiring HW to update have a 1 cycle delay, due
> to the register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers
> the use of WAIT_REG_MEM operation 1 case only.So it needs to call
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to
> update firmware in case firmware is too old to have function to realize
> dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is
> moved to gfxhub in gfx10. So it needs to add dummy read in driver
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <Changfeng.Zhu@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 48 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>   4 files changed, 64 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>   	uint32_t			mec2_feature_version;
>   	bool				mec_fw_write_wait;
>   	bool				me_fw_write_wait;
> +	bool				cp_fw_write_wait;
>   	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
>   	unsigned			num_gfx_rings;
>   	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..c7a6f98bf6b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
>   	kfree(adev->gfx.rlc.register_list_format);
>   }
>   
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
> +{
> +	adev->gfx.cp_fw_write_wait = false;
> +
> +	switch (adev->asic_type) {
> +	case CHIP_NAVI10:
> +	case CHIP_NAVI12:
> +	case CHIP_NAVI14:
> +		if ((adev->gfx.me_fw_version >= 0x00000046) &&
> +		    (adev->gfx.me_feature_version >= 27) &&
> +		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
> +		    (adev->gfx.pfp_feature_version >= 27) &&
> +		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
> +		    (adev->gfx.mec_feature_version >= 27))
> +			adev->gfx.cp_fw_write_wait = true;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (adev->gfx.cp_fw_write_wait == false)
> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
> +			      GRBM requires 1-cycle delay in cp firmware\n");
> +}
> +
> +
>   static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>   {
>   	const struct rlc_firmware_header_v2_1 *rlc_hdr;
> @@ -829,6 +855,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
>   		}
>   	}
>   
> +	gfx_v10_0_check_fw_write_wait(adev);
>   out:
>   	if (err) {
>   		dev_err(adev->dev,
> @@ -4768,6 +4795,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						   uint32_t reg0, uint32_t reg1,
> +						   uint32_t ref, uint32_t mask)
> +{
> +	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> +	struct amdgpu_device *adev = ring->adev;
> +	bool fw_version_ok = false;
> +
> +	fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> +	if (fw_version_ok)
> +		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> +				       ref, mask, 0x20);
> +	else
> +		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> +							   ref, mask);
> +}
> +
>   static void
>   gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   				      uint32_t me, uint32_t pipe,
> @@ -5158,6 +5203,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
> @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
> @@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>   	.emit_rreg = gfx_v10_0_ring_emit_rreg,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..af2615ba52aa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>   			      upper_32_bits(pd_addr));
>   
> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -	/* wait for the invalidate to complete */
> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -				  1 << vmid, 1 << vmid);
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> +					    hub->vm_inv_eng0_ack + eng,
> +					    req, 1 << vmid);
>   
>   	return pd_addr;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..ec47542e21b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>   }
>   
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						   uint32_t reg0, uint32_t reg1,
> +						   uint32_t ref, uint32_t mask)
> +{
> +	amdgpu_ring_emit_wreg(ring, reg0, ref);
> +	/* wait for a cycle to reset vm_inv_eng*_ack */
> +	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
> +	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
> +}
> +
>   static int sdma_v5_0_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>   		/* sdma_v5_0_ring_emit_vm_flush */
>   		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> -		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>   		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
>   	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>   	.emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   	.pad_ib = sdma_v5_0_ring_pad_ib,
>   	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>   	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>   	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>   	.preempt_ib = sdma_v5_0_ring_preempt_ib,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06 12:50         ` Zhu, Changfeng
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu, Changfeng @ 2019-11-06 12:50 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Tuikov, Luben, Huang, Ray, Huang, Shimmer

Thanks, Chris.

BR,
Changfeng.

-----Original Message-----
From: Koenig, Christian <Christian.Koenig@amd.com> 
Sent: Wednesday, November 6, 2019 8:48 PM
To: Zhu, Changfeng <Changfeng.Zhu@amd.com>; amd-gfx@lists.freedesktop.org; Tuikov, Luben <Luben.Tuikov@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Huang, Shimmer <Xinmei.Huang@amd.com>
Subject: Re: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10

Am 06.11.19 um 11:52 schrieb Zhu, Changfeng:
> From: changzhu <Changfeng.Zhu@amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per 
> register wr->wr, wr->rd much faster than previous muticycle per 
> transaction done interface.  This has caused a problem where status 
> registers requiring HW to update have a 1 cycle delay, due to the 
> register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers the 
> use of WAIT_REG_MEM operation 1 case only.So it needs to call 
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning 
> to update firmware in case firmware is too old to have function to 
> realize dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma 
> is moved to gfxhub in gfx10. So it needs to add dummy read in driver 
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <Changfeng.Zhu@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 48 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>   4 files changed, 64 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>   	uint32_t			mec2_feature_version;
>   	bool				mec_fw_write_wait;
>   	bool				me_fw_write_wait;
> +	bool				cp_fw_write_wait;
>   	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
>   	unsigned			num_gfx_rings;
>   	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..c7a6f98bf6b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
>   	kfree(adev->gfx.rlc.register_list_format);
>   }
>   
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) 
> +{
> +	adev->gfx.cp_fw_write_wait = false;
> +
> +	switch (adev->asic_type) {
> +	case CHIP_NAVI10:
> +	case CHIP_NAVI12:
> +	case CHIP_NAVI14:
> +		if ((adev->gfx.me_fw_version >= 0x00000046) &&
> +		    (adev->gfx.me_feature_version >= 27) &&
> +		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
> +		    (adev->gfx.pfp_feature_version >= 27) &&
> +		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
> +		    (adev->gfx.mec_feature_version >= 27))
> +			adev->gfx.cp_fw_write_wait = true;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (adev->gfx.cp_fw_write_wait == false)
> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
> +			      GRBM requires 1-cycle delay in cp firmware\n"); }
> +
> +
>   static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>   {
>   	const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -829,6 +855,7 @@ 
> static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
>   		}
>   	}
>   
> +	gfx_v10_0_check_fw_write_wait(adev);
>   out:
>   	if (err) {
>   		dev_err(adev->dev,
> @@ -4768,6 +4795,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						   uint32_t reg0, uint32_t reg1,
> +						   uint32_t ref, uint32_t mask) {
> +	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> +	struct amdgpu_device *adev = ring->adev;
> +	bool fw_version_ok = false;
> +
> +	fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> +	if (fw_version_ok)
> +		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> +				       ref, mask, 0x20);
> +	else
> +		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> +							   ref, mask);
> +}
> +
>   static void
>   gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   				      uint32_t me, uint32_t pipe, @@ -5158,6 +5203,7 @@ static 
> const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = 
> { @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 
> @@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>   	.emit_rreg = gfx_v10_0_ring_emit_rreg,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..af2615ba52aa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>   			      upper_32_bits(pd_addr));
>   
> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -	/* wait for the invalidate to complete */
> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -				  1 << vmid, 1 << vmid);
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> +					    hub->vm_inv_eng0_ack + eng,
> +					    req, 1 << vmid);
>   
>   	return pd_addr;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..ec47542e21b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>   }
>   
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						   uint32_t reg0, uint32_t reg1,
> +						   uint32_t ref, uint32_t mask) {
> +	amdgpu_ring_emit_wreg(ring, reg0, ref);
> +	/* wait for a cycle to reset vm_inv_eng*_ack */
> +	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
> +	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); }
> +
>   static int sdma_v5_0_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ 
> -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>   		/* sdma_v5_0_ring_emit_vm_flush */
>   		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> -		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>   		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
>   	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>   	.emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   	.pad_ib = sdma_v5_0_ring_pad_ib,
>   	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>   	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>   	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>   	.preempt_ib = sdma_v5_0_ring_preempt_ib,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06 12:50         ` Zhu, Changfeng
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu, Changfeng @ 2019-11-06 12:50 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx, Tuikov, Luben, Huang, Ray, Huang, Shimmer

Thanks, Chris.

BR,
Changfeng.

-----Original Message-----
From: Koenig, Christian <Christian.Koenig@amd.com> 
Sent: Wednesday, November 6, 2019 8:48 PM
To: Zhu, Changfeng <Changfeng.Zhu@amd.com>; amd-gfx@lists.freedesktop.org; Tuikov, Luben <Luben.Tuikov@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Huang, Shimmer <Xinmei.Huang@amd.com>
Subject: Re: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10

Am 06.11.19 um 11:52 schrieb Zhu, Changfeng:
> From: changzhu <Changfeng.Zhu@amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per 
> register wr->wr, wr->rd much faster than previous muticycle per 
> transaction done interface.  This has caused a problem where status 
> registers requiring HW to update have a 1 cycle delay, due to the 
> register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers the 
> use of WAIT_REG_MEM operation 1 case only.So it needs to call 
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning 
> to update firmware in case firmware is too old to have function to 
> realize dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma 
> is moved to gfxhub in gfx10. So it needs to add dummy read in driver 
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <Changfeng.Zhu@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 48 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>   4 files changed, 64 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>   	uint32_t			mec2_feature_version;
>   	bool				mec_fw_write_wait;
>   	bool				me_fw_write_wait;
> +	bool				cp_fw_write_wait;
>   	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
>   	unsigned			num_gfx_rings;
>   	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..c7a6f98bf6b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
>   	kfree(adev->gfx.rlc.register_list_format);
>   }
>   
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) 
> +{
> +	adev->gfx.cp_fw_write_wait = false;
> +
> +	switch (adev->asic_type) {
> +	case CHIP_NAVI10:
> +	case CHIP_NAVI12:
> +	case CHIP_NAVI14:
> +		if ((adev->gfx.me_fw_version >= 0x00000046) &&
> +		    (adev->gfx.me_feature_version >= 27) &&
> +		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
> +		    (adev->gfx.pfp_feature_version >= 27) &&
> +		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
> +		    (adev->gfx.mec_feature_version >= 27))
> +			adev->gfx.cp_fw_write_wait = true;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (adev->gfx.cp_fw_write_wait == false)
> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
> +			      GRBM requires 1-cycle delay in cp firmware\n"); }
> +
> +
>   static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>   {
>   	const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -829,6 +855,7 @@ 
> static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
>   		}
>   	}
>   
> +	gfx_v10_0_check_fw_write_wait(adev);
>   out:
>   	if (err) {
>   		dev_err(adev->dev,
> @@ -4768,6 +4795,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						   uint32_t reg0, uint32_t reg1,
> +						   uint32_t ref, uint32_t mask) {
> +	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> +	struct amdgpu_device *adev = ring->adev;
> +	bool fw_version_ok = false;
> +
> +	fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> +	if (fw_version_ok)
> +		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> +				       ref, mask, 0x20);
> +	else
> +		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> +							   ref, mask);
> +}
> +
>   static void
>   gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   				      uint32_t me, uint32_t pipe, @@ -5158,6 +5203,7 @@ static 
> const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = 
> { @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 
> @@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>   	.emit_rreg = gfx_v10_0_ring_emit_rreg,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..af2615ba52aa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>   			      upper_32_bits(pd_addr));
>   
> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -	/* wait for the invalidate to complete */
> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -				  1 << vmid, 1 << vmid);
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> +					    hub->vm_inv_eng0_ack + eng,
> +					    req, 1 << vmid);
>   
>   	return pd_addr;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..ec47542e21b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>   }
>   
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						   uint32_t reg0, uint32_t reg1,
> +						   uint32_t ref, uint32_t mask) {
> +	amdgpu_ring_emit_wreg(ring, reg0, ref);
> +	/* wait for a cycle to reset vm_inv_eng*_ack */
> +	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
> +	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); }
> +
>   static int sdma_v5_0_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ 
> -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>   		/* sdma_v5_0_ring_emit_vm_flush */
>   		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> -		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>   		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
>   	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>   	.emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   	.pad_ib = sdma_v5_0_ring_pad_ib,
>   	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>   	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>   	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>   	.preempt_ib = sdma_v5_0_ring_preempt_ib,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06 13:07             ` Zhu, Changfeng
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu, Changfeng @ 2019-11-06 13:07 UTC (permalink / raw)
  To: Zhu, Changfeng, Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Tuikov, Luben, Huang,
	Ray, Huang, Shimmer



-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhu, Changfeng
Sent: Wednesday, November 6, 2019 8:50 PM
To: Koenig, Christian <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org; Tuikov, Luben <Luben.Tuikov@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Huang, Shimmer <Xinmei.Huang@amd.com>
Subject: RE: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10

Thanks, Chris.
You give many good suggestions and guidance for me to finish this patch.

BR,
Changfeng.

-----Original Message-----
From: Koenig, Christian <Christian.Koenig@amd.com>
Sent: Wednesday, November 6, 2019 8:48 PM
To: Zhu, Changfeng <Changfeng.Zhu@amd.com>; amd-gfx@lists.freedesktop.org; Tuikov, Luben <Luben.Tuikov@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Huang, Shimmer <Xinmei.Huang@amd.com>
Subject: Re: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10

Am 06.11.19 um 11:52 schrieb Zhu, Changfeng:
> From: changzhu <Changfeng.Zhu@amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per 
> register wr->wr, wr->rd much faster than previous muticycle per 
> transaction done interface.  This has caused a problem where status 
> registers requiring HW to update have a 1 cycle delay, due to the 
> register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers the 
> use of WAIT_REG_MEM operation 1 case only.So it needs to call 
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning 
> to update firmware in case firmware is too old to have function to 
> realize dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma 
> is moved to gfxhub in gfx10. So it needs to add dummy read in driver 
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <Changfeng.Zhu@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 48 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>   4 files changed, 64 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>   	uint32_t			mec2_feature_version;
>   	bool				mec_fw_write_wait;
>   	bool				me_fw_write_wait;
> +	bool				cp_fw_write_wait;
>   	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
>   	unsigned			num_gfx_rings;
>   	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..c7a6f98bf6b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
>   	kfree(adev->gfx.rlc.register_list_format);
>   }
>   
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) 
> +{
> +	adev->gfx.cp_fw_write_wait = false;
> +
> +	switch (adev->asic_type) {
> +	case CHIP_NAVI10:
> +	case CHIP_NAVI12:
> +	case CHIP_NAVI14:
> +		if ((adev->gfx.me_fw_version >= 0x00000046) &&
> +		    (adev->gfx.me_feature_version >= 27) &&
> +		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
> +		    (adev->gfx.pfp_feature_version >= 27) &&
> +		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
> +		    (adev->gfx.mec_feature_version >= 27))
> +			adev->gfx.cp_fw_write_wait = true;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (adev->gfx.cp_fw_write_wait == false)
> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
> +			      GRBM requires 1-cycle delay in cp firmware\n"); }
> +
> +
>   static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>   {
>   	const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -829,6 +855,7 @@ 
> static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
>   		}
>   	}
>   
> +	gfx_v10_0_check_fw_write_wait(adev);
>   out:
>   	if (err) {
>   		dev_err(adev->dev,
> @@ -4768,6 +4795,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						   uint32_t reg0, uint32_t reg1,
> +						   uint32_t ref, uint32_t mask) {
> +	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> +	struct amdgpu_device *adev = ring->adev;
> +	bool fw_version_ok = false;
> +
> +	fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> +	if (fw_version_ok)
> +		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> +				       ref, mask, 0x20);
> +	else
> +		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> +							   ref, mask);
> +}
> +
>   static void
>   gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   				      uint32_t me, uint32_t pipe, @@ -5158,6 +5203,7 @@ static 
> const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = 
> { @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 
> @@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>   	.emit_rreg = gfx_v10_0_ring_emit_rreg,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..af2615ba52aa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>   			      upper_32_bits(pd_addr));
>   
> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -	/* wait for the invalidate to complete */
> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -				  1 << vmid, 1 << vmid);
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> +					    hub->vm_inv_eng0_ack + eng,
> +					    req, 1 << vmid);
>   
>   	return pd_addr;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..ec47542e21b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>   }
>   
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						   uint32_t reg0, uint32_t reg1,
> +						   uint32_t ref, uint32_t mask) {
> +	amdgpu_ring_emit_wreg(ring, reg0, ref);
> +	/* wait for a cycle to reset vm_inv_eng*_ack */
> +	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
> +	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); }
> +
>   static int sdma_v5_0_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@
> -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>   		/* sdma_v5_0_ring_emit_vm_flush */
>   		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> -		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>   		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
>   	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>   	.emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   	.pad_ib = sdma_v5_0_ring_pad_ib,
>   	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>   	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>   	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>   	.preempt_ib = sdma_v5_0_ring_preempt_ib,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06 13:07             ` Zhu, Changfeng
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu, Changfeng @ 2019-11-06 13:07 UTC (permalink / raw)
  To: Zhu, Changfeng, Koenig, Christian, amd-gfx, Tuikov, Luben, Huang,
	Ray, Huang, Shimmer



-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Zhu, Changfeng
Sent: Wednesday, November 6, 2019 8:50 PM
To: Koenig, Christian <Christian.Koenig@amd.com>; amd-gfx@lists.freedesktop.org; Tuikov, Luben <Luben.Tuikov@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Huang, Shimmer <Xinmei.Huang@amd.com>
Subject: RE: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10

Thanks, Chris.
You give many good suggestions and guidance for me to finish this patch.

BR,
Changfeng.

-----Original Message-----
From: Koenig, Christian <Christian.Koenig@amd.com>
Sent: Wednesday, November 6, 2019 8:48 PM
To: Zhu, Changfeng <Changfeng.Zhu@amd.com>; amd-gfx@lists.freedesktop.org; Tuikov, Luben <Luben.Tuikov@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Huang, Shimmer <Xinmei.Huang@amd.com>
Subject: Re: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10

Am 06.11.19 um 11:52 schrieb Zhu, Changfeng:
> From: changzhu <Changfeng.Zhu@amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per 
> register wr->wr, wr->rd much faster than previous muticycle per 
> transaction done interface.  This has caused a problem where status 
> registers requiring HW to update have a 1 cycle delay, due to the 
> register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers the 
> use of WAIT_REG_MEM operation 1 case only.So it needs to call 
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning 
> to update firmware in case firmware is too old to have function to 
> realize dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma 
> is moved to gfxhub in gfx10. So it needs to add dummy read in driver 
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <Changfeng.Zhu@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 48 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>   4 files changed, 64 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>   	uint32_t			mec2_feature_version;
>   	bool				mec_fw_write_wait;
>   	bool				me_fw_write_wait;
> +	bool				cp_fw_write_wait;
>   	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
>   	unsigned			num_gfx_rings;
>   	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..c7a6f98bf6b8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
>   	kfree(adev->gfx.rlc.register_list_format);
>   }
>   
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) 
> +{
> +	adev->gfx.cp_fw_write_wait = false;
> +
> +	switch (adev->asic_type) {
> +	case CHIP_NAVI10:
> +	case CHIP_NAVI12:
> +	case CHIP_NAVI14:
> +		if ((adev->gfx.me_fw_version >= 0x00000046) &&
> +		    (adev->gfx.me_feature_version >= 27) &&
> +		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
> +		    (adev->gfx.pfp_feature_version >= 27) &&
> +		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
> +		    (adev->gfx.mec_feature_version >= 27))
> +			adev->gfx.cp_fw_write_wait = true;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (adev->gfx.cp_fw_write_wait == false)
> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
> +			      GRBM requires 1-cycle delay in cp firmware\n"); }
> +
> +
>   static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>   {
>   	const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -829,6 +855,7 @@ 
> static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
>   		}
>   	}
>   
> +	gfx_v10_0_check_fw_write_wait(adev);
>   out:
>   	if (err) {
>   		dev_err(adev->dev,
> @@ -4768,6 +4795,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						   uint32_t reg0, uint32_t reg1,
> +						   uint32_t ref, uint32_t mask) {
> +	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> +	struct amdgpu_device *adev = ring->adev;
> +	bool fw_version_ok = false;
> +
> +	fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> +	if (fw_version_ok)
> +		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> +				       ref, mask, 0x20);
> +	else
> +		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> +							   ref, mask);
> +}
> +
>   static void
>   gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   				      uint32_t me, uint32_t pipe, @@ -5158,6 +5203,7 @@ static 
> const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = 
> { @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 
> @@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>   	.emit_rreg = gfx_v10_0_ring_emit_rreg,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..af2615ba52aa 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>   			      upper_32_bits(pd_addr));
>   
> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -	/* wait for the invalidate to complete */
> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -				  1 << vmid, 1 << vmid);
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> +					    hub->vm_inv_eng0_ack + eng,
> +					    req, 1 << vmid);
>   
>   	return pd_addr;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..ec47542e21b0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>   }
>   
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						   uint32_t reg0, uint32_t reg1,
> +						   uint32_t ref, uint32_t mask) {
> +	amdgpu_ring_emit_wreg(ring, reg0, ref);
> +	/* wait for a cycle to reset vm_inv_eng*_ack */
> +	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
> +	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); }
> +
>   static int sdma_v5_0_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@
> -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>   		/* sdma_v5_0_ring_emit_vm_flush */
>   		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> -		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>   		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
>   	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>   	.emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   	.pad_ib = sdma_v5_0_ring_pad_ib,
>   	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>   	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>   	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>   	.preempt_ib = sdma_v5_0_ring_preempt_ib,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06 10:50         ` Zhu, Changfeng
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu, Changfeng @ 2019-11-06 10:50 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Tuikov, Luben, Huang, Ray, Huang, Shimmer

Hi Chris,

I move gfx_v10_0_check_fw_write_wait(adev);
to gfx_v10_0_init_microcode

BR,
Changfeng.

-----Original Message-----
From: Koenig, Christian <Christian.Koenig@amd.com> 
Sent: Wednesday, November 6, 2019 5:26 PM
To: Zhu, Changfeng <Changfeng.Zhu@amd.com>; amd-gfx@lists.freedesktop.org; Tuikov, Luben <Luben.Tuikov@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Huang, Shimmer <Xinmei.Huang@amd.com>
Subject: Re: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10

Am 06.11.19 um 09:21 schrieb Zhu, Changfeng:
> From: changzhu <Changfeng.Zhu@amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per 
> register wr->wr, wr->rd much faster than previous muticycle per 
> transaction done interface.  This has caused a problem where status 
> registers requiring HW to update have a 1 cycle delay, due to the 
> register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers the 
> use of WAIT_REG_MEM operation 1 case only.So it needs to call 
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning 
> to update firmware in case firmware is too old to have function to 
> realize dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma 
> is moved to gfxhub in gfx10. So it needs to add dummy read in driver 
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 48 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>   4 files changed, 64 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>   	uint32_t			mec2_feature_version;
>   	bool				mec_fw_write_wait;
>   	bool				me_fw_write_wait;
> +	bool				cp_fw_write_wait;
>   	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
>   	unsigned			num_gfx_rings;
>   	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..acdb0e4df9b4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
>   	kfree(adev->gfx.rlc.register_list_format);
>   }
>   
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) 
> +{
> +	adev->gfx.cp_fw_write_wait = false;
> +
> +	switch (adev->asic_type) {
> +	case CHIP_NAVI10:
> +	case CHIP_NAVI12:
> +	case CHIP_NAVI14:
> +		if ((adev->gfx.me_fw_version >= 0x00000046) &&
> +		    (adev->gfx.me_feature_version >= 27) &&
> +		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
> +		    (adev->gfx.pfp_feature_version >= 27) &&
> +		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
> +		    (adev->gfx.mec_feature_version >= 27))
> +			adev->gfx.cp_fw_write_wait = true;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (adev->gfx.cp_fw_write_wait == false)
> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
> +				  GRBM requires 1-cycle delay in cp firmware\n"); }
> +
> +
>   static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>   {
>   	const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -4768,6 +4794,25 
> @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						  uint32_t reg0, uint32_t reg1,
> +						  uint32_t ref, uint32_t mask)
> +{
> +	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> +	struct amdgpu_device *adev = ring->adev;
> +	bool fw_version_ok = false;
> +
> +	gfx_v10_0_check_fw_write_wait(adev);

Doing that here is invalid, the function can be called concurrently from multiple threads.

You either need to do this only once from some of the _init() callbacks or drop the adev->gfx.cp_fw_write_wait variable and instead return the result here.

> +	fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> +	if (fw_version_ok)
> +		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> +				      ref, mask, 0x20);
> +	else
> +		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> +							   ref, mask);
> +}
> +
>   static void
>   gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   				      uint32_t me, uint32_t pipe, @@ -5158,6 +5203,7 @@ static 
> const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = 
> { @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 
> @@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>   	.emit_rreg = gfx_v10_0_ring_emit_rreg,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..22c807309a22 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>   			      upper_32_bits(pd_addr));
>   
> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -	/* wait for the invalidate to complete */
> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -				  1 << vmid, 1 << vmid);
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> +						hub->vm_inv_eng0_ack + eng,
> +						req, 1 << vmid);
>   
>   	return pd_addr;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..7b15ddc739e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>   }
>   
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +												 uint32_t reg0, uint32_t reg1,
> +												 uint32_t ref, uint32_t mask)

BTW: You seem to have some odd tab configuration in your editor.

Tabs should be 8 spaces and for function parameters the next line should be starting behind the "(" above.

Regards,
Christian.

> +{
> +	amdgpu_ring_emit_wreg(ring, reg0, ref);
> +	/* wait for a cycle to reset vm_inv_eng*_ack */
> +	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
> +	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); }
> +
>   static int sdma_v5_0_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ 
> -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>   		/* sdma_v5_0_ring_emit_vm_flush */
>   		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> -		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>   		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
>   	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>   	.emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   	.pad_ib = sdma_v5_0_ring_pad_ib,
>   	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>   	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>   	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>   	.preempt_ib = sdma_v5_0_ring_preempt_ib,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06 10:50         ` Zhu, Changfeng
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu, Changfeng @ 2019-11-06 10:50 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx, Tuikov, Luben, Huang, Ray, Huang, Shimmer

Hi Chris,

I move gfx_v10_0_check_fw_write_wait(adev);
to gfx_v10_0_init_microcode

BR,
Changfeng.

-----Original Message-----
From: Koenig, Christian <Christian.Koenig@amd.com> 
Sent: Wednesday, November 6, 2019 5:26 PM
To: Zhu, Changfeng <Changfeng.Zhu@amd.com>; amd-gfx@lists.freedesktop.org; Tuikov, Luben <Luben.Tuikov@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Huang, Shimmer <Xinmei.Huang@amd.com>
Subject: Re: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10

Am 06.11.19 um 09:21 schrieb Zhu, Changfeng:
> From: changzhu <Changfeng.Zhu@amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per 
> register wr->wr, wr->rd much faster than previous muticycle per 
> transaction done interface.  This has caused a problem where status 
> registers requiring HW to update have a 1 cycle delay, due to the 
> register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers the 
> use of WAIT_REG_MEM operation 1 case only.So it needs to call 
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning 
> to update firmware in case firmware is too old to have function to 
> realize dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma 
> is moved to gfxhub in gfx10. So it needs to add dummy read in driver 
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 48 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>   4 files changed, 64 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>   	uint32_t			mec2_feature_version;
>   	bool				mec_fw_write_wait;
>   	bool				me_fw_write_wait;
> +	bool				cp_fw_write_wait;
>   	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
>   	unsigned			num_gfx_rings;
>   	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..acdb0e4df9b4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
>   	kfree(adev->gfx.rlc.register_list_format);
>   }
>   
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) 
> +{
> +	adev->gfx.cp_fw_write_wait = false;
> +
> +	switch (adev->asic_type) {
> +	case CHIP_NAVI10:
> +	case CHIP_NAVI12:
> +	case CHIP_NAVI14:
> +		if ((adev->gfx.me_fw_version >= 0x00000046) &&
> +		    (adev->gfx.me_feature_version >= 27) &&
> +		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
> +		    (adev->gfx.pfp_feature_version >= 27) &&
> +		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
> +		    (adev->gfx.mec_feature_version >= 27))
> +			adev->gfx.cp_fw_write_wait = true;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (adev->gfx.cp_fw_write_wait == false)
> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
> +				  GRBM requires 1-cycle delay in cp firmware\n"); }
> +
> +
>   static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>   {
>   	const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -4768,6 +4794,25 
> @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						  uint32_t reg0, uint32_t reg1,
> +						  uint32_t ref, uint32_t mask)
> +{
> +	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> +	struct amdgpu_device *adev = ring->adev;
> +	bool fw_version_ok = false;
> +
> +	gfx_v10_0_check_fw_write_wait(adev);

Doing that here is invalid, the function can be called concurrently from multiple threads.

You either need to do this only once from some of the _init() callbacks or drop the adev->gfx.cp_fw_write_wait variable and instead return the result here.

> +	fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> +	if (fw_version_ok)
> +		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> +				      ref, mask, 0x20);
> +	else
> +		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> +							   ref, mask);
> +}
> +
>   static void
>   gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   				      uint32_t me, uint32_t pipe, @@ -5158,6 +5203,7 @@ static 
> const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = 
> { @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 
> @@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>   	.emit_rreg = gfx_v10_0_ring_emit_rreg,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..22c807309a22 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>   			      upper_32_bits(pd_addr));
>   
> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -	/* wait for the invalidate to complete */
> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -				  1 << vmid, 1 << vmid);
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> +						hub->vm_inv_eng0_ack + eng,
> +						req, 1 << vmid);
>   
>   	return pd_addr;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..7b15ddc739e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>   }
>   
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +												 uint32_t reg0, uint32_t reg1,
> +												 uint32_t ref, uint32_t mask)

BTW: You seem to have some odd tab configuration in your editor.

Tabs should be 8 spaces and for function parameters the next line should be starting behind the "(" above.

Regards,
Christian.

> +{
> +	amdgpu_ring_emit_wreg(ring, reg0, ref);
> +	/* wait for a cycle to reset vm_inv_eng*_ack */
> +	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
> +	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); }
> +
>   static int sdma_v5_0_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ 
> -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>   		/* sdma_v5_0_ring_emit_vm_flush */
>   		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> -		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>   		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
>   	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>   	.emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   	.pad_ib = sdma_v5_0_ring_pad_ib,
>   	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>   	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>   	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>   	.preempt_ib = sdma_v5_0_ring_preempt_ib,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06  9:26     ` Koenig, Christian
  0 siblings, 0 replies; 20+ messages in thread
From: Koenig, Christian @ 2019-11-06  9:26 UTC (permalink / raw)
  To: Zhu, Changfeng, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Tuikov,
	Luben, Huang, Ray, Huang, Shimmer

Am 06.11.19 um 09:21 schrieb Zhu, Changfeng:
> From: changzhu <Changfeng.Zhu@amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per
> register wr->wr, wr->rd much faster than previous muticycle per
> transaction done interface.  This has caused a problem where
> status registers requiring HW to update have a 1 cycle delay, due
> to the register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers
> the use of WAIT_REG_MEM operation 1 case only.So it needs to call
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to
> update firmware in case firmware is too old to have function to realize
> dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is
> moved to gfxhub in gfx10. So it needs to add dummy read in driver
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 48 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>   4 files changed, 64 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>   	uint32_t			mec2_feature_version;
>   	bool				mec_fw_write_wait;
>   	bool				me_fw_write_wait;
> +	bool				cp_fw_write_wait;
>   	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
>   	unsigned			num_gfx_rings;
>   	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..acdb0e4df9b4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
>   	kfree(adev->gfx.rlc.register_list_format);
>   }
>   
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
> +{
> +	adev->gfx.cp_fw_write_wait = false;
> +
> +	switch (adev->asic_type) {
> +	case CHIP_NAVI10:
> +	case CHIP_NAVI12:
> +	case CHIP_NAVI14:
> +		if ((adev->gfx.me_fw_version >= 0x00000046) &&
> +		    (adev->gfx.me_feature_version >= 27) &&
> +		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
> +		    (adev->gfx.pfp_feature_version >= 27) &&
> +		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
> +		    (adev->gfx.mec_feature_version >= 27))
> +			adev->gfx.cp_fw_write_wait = true;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (adev->gfx.cp_fw_write_wait == false)
> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
> +				  GRBM requires 1-cycle delay in cp firmware\n");
> +}
> +
> +
>   static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>   {
>   	const struct rlc_firmware_header_v2_1 *rlc_hdr;
> @@ -4768,6 +4794,25 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						  uint32_t reg0, uint32_t reg1,
> +						  uint32_t ref, uint32_t mask)
> +{
> +	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> +	struct amdgpu_device *adev = ring->adev;
> +	bool fw_version_ok = false;
> +
> +	gfx_v10_0_check_fw_write_wait(adev);

Doing that here is invalid, the function can be called concurrently from 
multiple threads.

You either need to do this only once from some of the _init() callbacks 
or drop the adev->gfx.cp_fw_write_wait variable and instead return the 
result here.

> +	fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> +	if (fw_version_ok)
> +		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> +				      ref, mask, 0x20);
> +	else
> +		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> +							   ref, mask);
> +}
> +
>   static void
>   gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   				      uint32_t me, uint32_t pipe,
> @@ -5158,6 +5203,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
> @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
> @@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>   	.emit_rreg = gfx_v10_0_ring_emit_rreg,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..22c807309a22 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>   			      upper_32_bits(pd_addr));
>   
> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -	/* wait for the invalidate to complete */
> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -				  1 << vmid, 1 << vmid);
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> +						hub->vm_inv_eng0_ack + eng,
> +						req, 1 << vmid);
>   
>   	return pd_addr;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..7b15ddc739e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>   }
>   
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +												 uint32_t reg0, uint32_t reg1,
> +												 uint32_t ref, uint32_t mask)

BTW: You seem to have some odd tab configuration in your editor.

Tabs should be 8 spaces and for function parameters the next line should 
be starting behind the "(" above.

Regards,
Christian.

> +{
> +	amdgpu_ring_emit_wreg(ring, reg0, ref);
> +	/* wait for a cycle to reset vm_inv_eng*_ack */
> +	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
> +	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
> +}
> +
>   static int sdma_v5_0_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>   		/* sdma_v5_0_ring_emit_vm_flush */
>   		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> -		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>   		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
>   	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>   	.emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   	.pad_ib = sdma_v5_0_ring_pad_ib,
>   	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>   	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>   	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>   	.preempt_ib = sdma_v5_0_ring_preempt_ib,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06  9:26     ` Koenig, Christian
  0 siblings, 0 replies; 20+ messages in thread
From: Koenig, Christian @ 2019-11-06  9:26 UTC (permalink / raw)
  To: Zhu, Changfeng, amd-gfx, Tuikov, Luben, Huang, Ray, Huang, Shimmer

Am 06.11.19 um 09:21 schrieb Zhu, Changfeng:
> From: changzhu <Changfeng.Zhu@amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per
> register wr->wr, wr->rd much faster than previous muticycle per
> transaction done interface.  This has caused a problem where
> status registers requiring HW to update have a 1 cycle delay, due
> to the register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers
> the use of WAIT_REG_MEM operation 1 case only.So it needs to call
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to
> update firmware in case firmware is too old to have function to realize
> dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is
> moved to gfxhub in gfx10. So it needs to add dummy read in driver
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 48 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>   4 files changed, 64 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>   	uint32_t			mec2_feature_version;
>   	bool				mec_fw_write_wait;
>   	bool				me_fw_write_wait;
> +	bool				cp_fw_write_wait;
>   	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
>   	unsigned			num_gfx_rings;
>   	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..acdb0e4df9b4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
>   	kfree(adev->gfx.rlc.register_list_format);
>   }
>   
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
> +{
> +	adev->gfx.cp_fw_write_wait = false;
> +
> +	switch (adev->asic_type) {
> +	case CHIP_NAVI10:
> +	case CHIP_NAVI12:
> +	case CHIP_NAVI14:
> +		if ((adev->gfx.me_fw_version >= 0x00000046) &&
> +		    (adev->gfx.me_feature_version >= 27) &&
> +		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
> +		    (adev->gfx.pfp_feature_version >= 27) &&
> +		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
> +		    (adev->gfx.mec_feature_version >= 27))
> +			adev->gfx.cp_fw_write_wait = true;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (adev->gfx.cp_fw_write_wait == false)
> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
> +				  GRBM requires 1-cycle delay in cp firmware\n");
> +}
> +
> +
>   static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>   {
>   	const struct rlc_firmware_header_v2_1 *rlc_hdr;
> @@ -4768,6 +4794,25 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						  uint32_t reg0, uint32_t reg1,
> +						  uint32_t ref, uint32_t mask)
> +{
> +	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> +	struct amdgpu_device *adev = ring->adev;
> +	bool fw_version_ok = false;
> +
> +	gfx_v10_0_check_fw_write_wait(adev);

Doing that here is invalid, the function can be called concurrently from 
multiple threads.

You either need to do this only once from some of the _init() callbacks 
or drop the adev->gfx.cp_fw_write_wait variable and instead return the 
result here.

> +	fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> +	if (fw_version_ok)
> +		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> +				      ref, mask, 0x20);
> +	else
> +		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> +							   ref, mask);
> +}
> +
>   static void
>   gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   				      uint32_t me, uint32_t pipe,
> @@ -5158,6 +5203,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
> @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
> @@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>   	.emit_rreg = gfx_v10_0_ring_emit_rreg,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..22c807309a22 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>   			      upper_32_bits(pd_addr));
>   
> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -	/* wait for the invalidate to complete */
> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -				  1 << vmid, 1 << vmid);
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> +						hub->vm_inv_eng0_ack + eng,
> +						req, 1 << vmid);
>   
>   	return pd_addr;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..7b15ddc739e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>   }
>   
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +												 uint32_t reg0, uint32_t reg1,
> +												 uint32_t ref, uint32_t mask)

BTW: You seem to have some odd tab configuration in your editor.

Tabs should be 8 spaces and for function parameters the next line should 
be starting behind the "(" above.

Regards,
Christian.

> +{
> +	amdgpu_ring_emit_wreg(ring, reg0, ref);
> +	/* wait for a cycle to reset vm_inv_eng*_ack */
> +	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
> +	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
> +}
> +
>   static int sdma_v5_0_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>   		/* sdma_v5_0_ring_emit_vm_flush */
>   		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> -		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>   		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
>   	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>   	.emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   	.pad_ib = sdma_v5_0_ring_pad_ib,
>   	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>   	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>   	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>   	.preempt_ib = sdma_v5_0_ring_preempt_ib,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06  8:21 ` Zhu, Changfeng
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu, Changfeng @ 2019-11-06  8:21 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Koenig, Christian,
	Tuikov, Luben, Huang, Ray, Huang, Shimmer
  Cc: Zhu, Changfeng

From: changzhu <Changfeng.Zhu@amd.com>

The GRBM register interface is now capable of bursting 1 cycle per
register wr->wr, wr->rd much faster than previous muticycle per
transaction done interface.  This has caused a problem where
status registers requiring HW to update have a 1 cycle delay, due
to the register update having to go through GRBM.

For cp ucode, it has realized dummy read in cp firmware.It covers
the use of WAIT_REG_MEM operation 1 case only.So it needs to call
gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to
update firmware in case firmware is too old to have function to realize
dummy read in cp firmware.

For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is
moved to gfxhub in gfx10. So it needs to add dummy read in driver
between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.

Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 48 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
 4 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 459aa9059542..a74ecd449775 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -267,6 +267,7 @@ struct amdgpu_gfx {
 	uint32_t			mec2_feature_version;
 	bool				mec_fw_write_wait;
 	bool				me_fw_write_wait;
+	bool				cp_fw_write_wait;
 	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
 	unsigned			num_gfx_rings;
 	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 17a5cbfd0024..acdb0e4df9b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
 	kfree(adev->gfx.rlc.register_list_format);
 }
 
+static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+	adev->gfx.cp_fw_write_wait = false;
+
+	switch (adev->asic_type) {
+	case CHIP_NAVI10:
+	case CHIP_NAVI12:
+	case CHIP_NAVI14:
+		if ((adev->gfx.me_fw_version >= 0x00000046) &&
+		    (adev->gfx.me_feature_version >= 27) &&
+		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
+		    (adev->gfx.pfp_feature_version >= 27) &&
+		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
+		    (adev->gfx.mec_feature_version >= 27))
+			adev->gfx.cp_fw_write_wait = true;
+		break;
+	default:
+		break;
+	}
+
+	if (adev->gfx.cp_fw_write_wait == false)
+		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
+				  GRBM requires 1-cycle delay in cp firmware\n");
+}
+
+
 static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
 {
 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
@@ -4768,6 +4794,25 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
 }
 
+static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+						  uint32_t reg0, uint32_t reg1,
+						  uint32_t ref, uint32_t mask)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+	struct amdgpu_device *adev = ring->adev;
+	bool fw_version_ok = false;
+
+	gfx_v10_0_check_fw_write_wait(adev);
+	fw_version_ok = adev->gfx.cp_fw_write_wait;
+
+	if (fw_version_ok)
+		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+				      ref, mask, 0x20);
+	else
+		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+							   ref, mask);
+}
+
 static void
 gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 				      uint32_t me, uint32_t pipe,
@@ -5158,6 +5203,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 	.emit_tmz = gfx_v10_0_ring_emit_tmz,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
@@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
 	.emit_rreg = gfx_v10_0_ring_emit_rreg,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 3b00bce14cfb..22c807309a22 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
 			      upper_32_bits(pd_addr));
 
-	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
-
-	/* wait for the invalidate to complete */
-	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
-				  1 << vmid, 1 << vmid);
+	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
+						hub->vm_inv_eng0_ack + eng,
+						req, 1 << vmid);
 
 	return pd_addr;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 3460c00f3eaa..7b15ddc739e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
 }
 
+static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+												 uint32_t reg0, uint32_t reg1,
+												 uint32_t ref, uint32_t mask)
+{
+	amdgpu_ring_emit_wreg(ring, reg0, ref);
+	/* wait for a cycle to reset vm_inv_eng*_ack */
+	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
 static int sdma_v5_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
 		/* sdma_v5_0_ring_emit_vm_flush */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
-		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
 		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
 	.emit_ib = sdma_v5_0_ring_emit_ib,
@@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 	.pad_ib = sdma_v5_0_ring_pad_ib,
 	.emit_wreg = sdma_v5_0_ring_emit_wreg,
 	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
 	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
 	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
 	.preempt_ib = sdma_v5_0_ring_preempt_ib,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06  8:21 ` Zhu, Changfeng
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu, Changfeng @ 2019-11-06  8:21 UTC (permalink / raw)
  To: amd-gfx, Koenig,  Christian, Tuikov, Luben, Huang, Ray, Huang, Shimmer
  Cc: Zhu, Changfeng

From: changzhu <Changfeng.Zhu@amd.com>

The GRBM register interface is now capable of bursting 1 cycle per
register wr->wr, wr->rd much faster than previous muticycle per
transaction done interface.  This has caused a problem where
status registers requiring HW to update have a 1 cycle delay, due
to the register update having to go through GRBM.

For cp ucode, it has realized dummy read in cp firmware.It covers
the use of WAIT_REG_MEM operation 1 case only.So it needs to call
gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to
update firmware in case firmware is too old to have function to realize
dummy read in cp firmware.

For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is
moved to gfxhub in gfx10. So it needs to add dummy read in driver
between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.

Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 48 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
 4 files changed, 64 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 459aa9059542..a74ecd449775 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -267,6 +267,7 @@ struct amdgpu_gfx {
 	uint32_t			mec2_feature_version;
 	bool				mec_fw_write_wait;
 	bool				me_fw_write_wait;
+	bool				cp_fw_write_wait;
 	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
 	unsigned			num_gfx_rings;
 	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 17a5cbfd0024..acdb0e4df9b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
 	kfree(adev->gfx.rlc.register_list_format);
 }
 
+static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+	adev->gfx.cp_fw_write_wait = false;
+
+	switch (adev->asic_type) {
+	case CHIP_NAVI10:
+	case CHIP_NAVI12:
+	case CHIP_NAVI14:
+		if ((adev->gfx.me_fw_version >= 0x00000046) &&
+		    (adev->gfx.me_feature_version >= 27) &&
+		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
+		    (adev->gfx.pfp_feature_version >= 27) &&
+		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
+		    (adev->gfx.mec_feature_version >= 27))
+			adev->gfx.cp_fw_write_wait = true;
+		break;
+	default:
+		break;
+	}
+
+	if (adev->gfx.cp_fw_write_wait == false)
+		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
+				  GRBM requires 1-cycle delay in cp firmware\n");
+}
+
+
 static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
 {
 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
@@ -4768,6 +4794,25 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
 }
 
+static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+						  uint32_t reg0, uint32_t reg1,
+						  uint32_t ref, uint32_t mask)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+	struct amdgpu_device *adev = ring->adev;
+	bool fw_version_ok = false;
+
+	gfx_v10_0_check_fw_write_wait(adev);
+	fw_version_ok = adev->gfx.cp_fw_write_wait;
+
+	if (fw_version_ok)
+		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+				      ref, mask, 0x20);
+	else
+		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+							   ref, mask);
+}
+
 static void
 gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 				      uint32_t me, uint32_t pipe,
@@ -5158,6 +5203,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 	.emit_tmz = gfx_v10_0_ring_emit_tmz,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
@@ -5221,6 +5268,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
 	.emit_rreg = gfx_v10_0_ring_emit_rreg,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 3b00bce14cfb..22c807309a22 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
 			      upper_32_bits(pd_addr));
 
-	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
-
-	/* wait for the invalidate to complete */
-	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
-				  1 << vmid, 1 << vmid);
+	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
+						hub->vm_inv_eng0_ack + eng,
+						req, 1 << vmid);
 
 	return pd_addr;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 3460c00f3eaa..7b15ddc739e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
 }
 
+static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+												 uint32_t reg0, uint32_t reg1,
+												 uint32_t ref, uint32_t mask)
+{
+	amdgpu_ring_emit_wreg(ring, reg0, ref);
+	/* wait for a cycle to reset vm_inv_eng*_ack */
+	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
 static int sdma_v5_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
 		/* sdma_v5_0_ring_emit_vm_flush */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
-		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
 		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
 	.emit_ib = sdma_v5_0_ring_emit_ib,
@@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 	.pad_ib = sdma_v5_0_ring_pad_ib,
 	.emit_wreg = sdma_v5_0_ring_emit_wreg,
 	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
 	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
 	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
 	.preempt_ib = sdma_v5_0_ring_preempt_ib,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06  8:15         ` Zhu, Changfeng
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu, Changfeng @ 2019-11-06  8:15 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Tuikov, Luben, Huang, Ray, Huang, Shimmer

Thanks, Chris.

I double check all engines on gfx10 implement the emit_reg_write_reg_wait callback.
You're right. I miss .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, for gfx_v10_0_ring_funcs_kiq. So I add it.

For sdma5, there is one amdgpu_ring_funs engine: amdgpu_ring_funcs sdma_v5_0_ring_funcs
We have defined:
.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,

For vcn/uvd/vce,Their engines have all been defined:
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper;
before.

So after adding .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait
for gfx_v10_0_ring_funcs_kiq.
I think there should be no NULL pointer deref problem.

BR,
Changfeng.

-----Original Message-----
From: Koenig, Christian <Christian.Koenig@amd.com> 
Sent: Tuesday, November 5, 2019 7:58 PM
To: Zhu, Changfeng <Changfeng.Zhu@amd.com>; amd-gfx@lists.freedesktop.org; Tuikov, Luben <Luben.Tuikov@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Huang, Shimmer <Xinmei.Huang@amd.com>
Subject: Re: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10

Am 05.11.19 um 12:42 schrieb Zhu, Changfeng:
> From: changzhu <Changfeng.Zhu@amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per 
> register wr->wr, wr->rd much faster than previous muticycle per 
> transaction done interface.  This has caused a problem where status 
> registers requiring HW to update have a 1 cycle delay, due to the 
> register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers the 
> use of WAIT_REG_MEM operation 1 case only.So it needs to call 
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning 
> to update firmware in case firmware is too old to have function to 
> realize dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma 
> is moved to gfxhub in gfx10. So it needs to add dummy read in driver 
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 47 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>   4 files changed, 63 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>   	uint32_t			mec2_feature_version;
>   	bool				mec_fw_write_wait;
>   	bool				me_fw_write_wait;
> +	bool				cp_fw_write_wait;
>   	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
>   	unsigned			num_gfx_rings;
>   	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..e82b6d796b69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
>   	kfree(adev->gfx.rlc.register_list_format);
>   }
>   
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) 
> +{
> +	adev->gfx.cp_fw_write_wait = false;
> +
> +	switch (adev->asic_type) {
> +	case CHIP_NAVI10:
> +	case CHIP_NAVI12:
> +	case CHIP_NAVI14:
> +		if ((adev->gfx.me_fw_version >= 0x00000046) &&
> +		    (adev->gfx.me_feature_version >= 27) &&
> +		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
> +		    (adev->gfx.pfp_feature_version >= 27) &&
> +		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
> +		    (adev->gfx.mec_feature_version >= 27))
> +			adev->gfx.cp_fw_write_wait = true;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (adev->gfx.cp_fw_write_wait == false)
> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
> +				  GRBM requires 1-cycle delay in cp firmware\n"); }
> +
> +
>   static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>   {
>   	const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -4768,6 +4794,25 
> @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						  uint32_t reg0, uint32_t reg1,
> +						  uint32_t ref, uint32_t mask)
> +{
> +	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> +	struct amdgpu_device *adev = ring->adev;
> +	bool fw_version_ok = false;
> +
> +	gfx_v10_0_check_fw_write_wait(adev);
> +	fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> +	if (fw_version_ok)
> +		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> +				      ref, mask, 0x20);
> +	else
> +		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> +							   ref, mask);
> +}
> +
>   static void
>   gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   				      uint32_t me, uint32_t pipe, @@ -5158,6 +5203,7 @@ static 
> const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = 
> { @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..22c807309a22 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>   			      upper_32_bits(pd_addr));
>   
> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -	/* wait for the invalidate to complete */
> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -				  1 << vmid, 1 << vmid);
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> +						hub->vm_inv_eng0_ack + eng,
> +						req, 1 << vmid);

You need to double check that all engines on gfx10 implement the emit_reg_write_reg_wait callback (probably as dummy with .emit_reg_write_reg_wait=amdgpu_ring_emit_reg_write_reg_wait_helper).

Otherwise that could quickly result in a NULL pointer deref here for the multimedia engines.

Apart from that the patch looks good to me now.

Thanks,
Christian.

>   
>   	return pd_addr;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..7b15ddc739e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>   }
>   
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +												 uint32_t reg0, uint32_t reg1,
> +												 uint32_t ref, uint32_t mask) {
> +	amdgpu_ring_emit_wreg(ring, reg0, ref);
> +	/* wait for a cycle to reset vm_inv_eng*_ack */
> +	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
> +	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); }
> +
>   static int sdma_v5_0_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ 
> -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>   		/* sdma_v5_0_ring_emit_vm_flush */
>   		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> -		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>   		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
>   	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>   	.emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   	.pad_ib = sdma_v5_0_ring_pad_ib,
>   	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>   	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>   	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>   	.preempt_ib = sdma_v5_0_ring_preempt_ib,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-06  8:15         ` Zhu, Changfeng
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu, Changfeng @ 2019-11-06  8:15 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx, Tuikov, Luben, Huang, Ray, Huang, Shimmer

Thanks, Chris.

I double check all engines on gfx10 implement the emit_reg_write_reg_wait callback.
You're right. I miss .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, for gfx_v10_0_ring_funcs_kiq. So I add it.

For sdma5, there is one amdgpu_ring_funs engine: amdgpu_ring_funcs sdma_v5_0_ring_funcs
We have defined:
.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,

For vcn/uvd/vce,Their engines have all been defined:
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper;
before.

So after adding .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait
for gfx_v10_0_ring_funcs_kiq.
I think there should be no NULL pointer deref problem.

BR,
Changfeng.

-----Original Message-----
From: Koenig, Christian <Christian.Koenig@amd.com> 
Sent: Tuesday, November 5, 2019 7:58 PM
To: Zhu, Changfeng <Changfeng.Zhu@amd.com>; amd-gfx@lists.freedesktop.org; Tuikov, Luben <Luben.Tuikov@amd.com>; Huang, Ray <Ray.Huang@amd.com>; Huang, Shimmer <Xinmei.Huang@amd.com>
Subject: Re: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10

Am 05.11.19 um 12:42 schrieb Zhu, Changfeng:
> From: changzhu <Changfeng.Zhu@amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per 
> register wr->wr, wr->rd much faster than previous muticycle per 
> transaction done interface.  This has caused a problem where status 
> registers requiring HW to update have a 1 cycle delay, due to the 
> register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers the 
> use of WAIT_REG_MEM operation 1 case only.So it needs to call 
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning 
> to update firmware in case firmware is too old to have function to 
> realize dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma 
> is moved to gfxhub in gfx10. So it needs to add dummy read in driver 
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 47 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>   4 files changed, 63 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>   	uint32_t			mec2_feature_version;
>   	bool				mec_fw_write_wait;
>   	bool				me_fw_write_wait;
> +	bool				cp_fw_write_wait;
>   	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
>   	unsigned			num_gfx_rings;
>   	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..e82b6d796b69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
>   	kfree(adev->gfx.rlc.register_list_format);
>   }
>   
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) 
> +{
> +	adev->gfx.cp_fw_write_wait = false;
> +
> +	switch (adev->asic_type) {
> +	case CHIP_NAVI10:
> +	case CHIP_NAVI12:
> +	case CHIP_NAVI14:
> +		if ((adev->gfx.me_fw_version >= 0x00000046) &&
> +		    (adev->gfx.me_feature_version >= 27) &&
> +		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
> +		    (adev->gfx.pfp_feature_version >= 27) &&
> +		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
> +		    (adev->gfx.mec_feature_version >= 27))
> +			adev->gfx.cp_fw_write_wait = true;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (adev->gfx.cp_fw_write_wait == false)
> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
> +				  GRBM requires 1-cycle delay in cp firmware\n"); }
> +
> +
>   static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>   {
>   	const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -4768,6 +4794,25 
> @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						  uint32_t reg0, uint32_t reg1,
> +						  uint32_t ref, uint32_t mask)
> +{
> +	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> +	struct amdgpu_device *adev = ring->adev;
> +	bool fw_version_ok = false;
> +
> +	gfx_v10_0_check_fw_write_wait(adev);
> +	fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> +	if (fw_version_ok)
> +		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> +				      ref, mask, 0x20);
> +	else
> +		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> +							   ref, mask);
> +}
> +
>   static void
>   gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   				      uint32_t me, uint32_t pipe, @@ -5158,6 +5203,7 @@ static 
> const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = 
> { @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..22c807309a22 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>   			      upper_32_bits(pd_addr));
>   
> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -	/* wait for the invalidate to complete */
> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -				  1 << vmid, 1 << vmid);
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> +						hub->vm_inv_eng0_ack + eng,
> +						req, 1 << vmid);

You need to double check that all engines on gfx10 implement the emit_reg_write_reg_wait callback (probably as dummy with .emit_reg_write_reg_wait=amdgpu_ring_emit_reg_write_reg_wait_helper).

Otherwise that could quickly result in a NULL pointer deref here for the multimedia engines.

Apart from that the patch looks good to me now.

Thanks,
Christian.

>   
>   	return pd_addr;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..7b15ddc739e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>   }
>   
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +												 uint32_t reg0, uint32_t reg1,
> +												 uint32_t ref, uint32_t mask) {
> +	amdgpu_ring_emit_wreg(ring, reg0, ref);
> +	/* wait for a cycle to reset vm_inv_eng*_ack */
> +	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
> +	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); }
> +
>   static int sdma_v5_0_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ 
> -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>   		/* sdma_v5_0_ring_emit_vm_flush */
>   		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> -		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>   		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
>   	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>   	.emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   	.pad_ib = sdma_v5_0_ring_pad_ib,
>   	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>   	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>   	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>   	.preempt_ib = sdma_v5_0_ring_preempt_ib,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-05 11:58     ` Koenig, Christian
  0 siblings, 0 replies; 20+ messages in thread
From: Koenig, Christian @ 2019-11-05 11:58 UTC (permalink / raw)
  To: Zhu, Changfeng, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Tuikov,
	Luben, Huang, Ray, Huang, Shimmer

Am 05.11.19 um 12:42 schrieb Zhu, Changfeng:
> From: changzhu <Changfeng.Zhu@amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per
> register wr->wr, wr->rd much faster than previous muticycle per
> transaction done interface.  This has caused a problem where
> status registers requiring HW to update have a 1 cycle delay, due
> to the register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers
> the use of WAIT_REG_MEM operation 1 case only.So it needs to call
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to
> update firmware in case firmware is too old to have function to realize
> dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is
> moved to gfxhub in gfx10. So it needs to add dummy read in driver
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 47 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>   4 files changed, 63 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>   	uint32_t			mec2_feature_version;
>   	bool				mec_fw_write_wait;
>   	bool				me_fw_write_wait;
> +	bool				cp_fw_write_wait;
>   	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
>   	unsigned			num_gfx_rings;
>   	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..e82b6d796b69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
>   	kfree(adev->gfx.rlc.register_list_format);
>   }
>   
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
> +{
> +	adev->gfx.cp_fw_write_wait = false;
> +
> +	switch (adev->asic_type) {
> +	case CHIP_NAVI10:
> +	case CHIP_NAVI12:
> +	case CHIP_NAVI14:
> +		if ((adev->gfx.me_fw_version >= 0x00000046) &&
> +		    (adev->gfx.me_feature_version >= 27) &&
> +		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
> +		    (adev->gfx.pfp_feature_version >= 27) &&
> +		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
> +		    (adev->gfx.mec_feature_version >= 27))
> +			adev->gfx.cp_fw_write_wait = true;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (adev->gfx.cp_fw_write_wait == false)
> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
> +				  GRBM requires 1-cycle delay in cp firmware\n");
> +}
> +
> +
>   static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>   {
>   	const struct rlc_firmware_header_v2_1 *rlc_hdr;
> @@ -4768,6 +4794,25 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						  uint32_t reg0, uint32_t reg1,
> +						  uint32_t ref, uint32_t mask)
> +{
> +	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> +	struct amdgpu_device *adev = ring->adev;
> +	bool fw_version_ok = false;
> +
> +	gfx_v10_0_check_fw_write_wait(adev);
> +	fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> +	if (fw_version_ok)
> +		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> +				      ref, mask, 0x20);
> +	else
> +		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> +							   ref, mask);
> +}
> +
>   static void
>   gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   				      uint32_t me, uint32_t pipe,
> @@ -5158,6 +5203,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
> @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..22c807309a22 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>   			      upper_32_bits(pd_addr));
>   
> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -	/* wait for the invalidate to complete */
> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -				  1 << vmid, 1 << vmid);
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> +						hub->vm_inv_eng0_ack + eng,
> +						req, 1 << vmid);

You need to double check that all engines on gfx10 implement the 
emit_reg_write_reg_wait callback (probably as dummy with 
.emit_reg_write_reg_wait=amdgpu_ring_emit_reg_write_reg_wait_helper).

Otherwise that could quickly result in a NULL pointer deref here for the 
multimedia engines.

Apart from that the patch looks good to me now.

Thanks,
Christian.

>   
>   	return pd_addr;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..7b15ddc739e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>   }
>   
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +												 uint32_t reg0, uint32_t reg1,
> +												 uint32_t ref, uint32_t mask)
> +{
> +	amdgpu_ring_emit_wreg(ring, reg0, ref);
> +	/* wait for a cycle to reset vm_inv_eng*_ack */
> +	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
> +	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
> +}
> +
>   static int sdma_v5_0_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>   		/* sdma_v5_0_ring_emit_vm_flush */
>   		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> -		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>   		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
>   	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>   	.emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   	.pad_ib = sdma_v5_0_ring_pad_ib,
>   	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>   	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>   	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>   	.preempt_ib = sdma_v5_0_ring_preempt_ib,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-05 11:58     ` Koenig, Christian
  0 siblings, 0 replies; 20+ messages in thread
From: Koenig, Christian @ 2019-11-05 11:58 UTC (permalink / raw)
  To: Zhu, Changfeng, amd-gfx, Tuikov, Luben, Huang, Ray, Huang, Shimmer

Am 05.11.19 um 12:42 schrieb Zhu, Changfeng:
> From: changzhu <Changfeng.Zhu@amd.com>
>
> The GRBM register interface is now capable of bursting 1 cycle per
> register wr->wr, wr->rd much faster than previous muticycle per
> transaction done interface.  This has caused a problem where
> status registers requiring HW to update have a 1 cycle delay, due
> to the register update having to go through GRBM.
>
> For cp ucode, it has realized dummy read in cp firmware.It covers
> the use of WAIT_REG_MEM operation 1 case only.So it needs to call
> gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to
> update firmware in case firmware is too old to have function to realize
> dummy read in cp firmware.
>
> For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is
> moved to gfxhub in gfx10. So it needs to add dummy read in driver
> between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.
>
> Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
> Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 47 +++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
>   4 files changed, 63 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 459aa9059542..a74ecd449775 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -267,6 +267,7 @@ struct amdgpu_gfx {
>   	uint32_t			mec2_feature_version;
>   	bool				mec_fw_write_wait;
>   	bool				me_fw_write_wait;
> +	bool				cp_fw_write_wait;
>   	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
>   	unsigned			num_gfx_rings;
>   	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 17a5cbfd0024..e82b6d796b69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
>   	kfree(adev->gfx.rlc.register_list_format);
>   }
>   
> +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
> +{
> +	adev->gfx.cp_fw_write_wait = false;
> +
> +	switch (adev->asic_type) {
> +	case CHIP_NAVI10:
> +	case CHIP_NAVI12:
> +	case CHIP_NAVI14:
> +		if ((adev->gfx.me_fw_version >= 0x00000046) &&
> +		    (adev->gfx.me_feature_version >= 27) &&
> +		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
> +		    (adev->gfx.pfp_feature_version >= 27) &&
> +		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
> +		    (adev->gfx.mec_feature_version >= 27))
> +			adev->gfx.cp_fw_write_wait = true;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	if (adev->gfx.cp_fw_write_wait == false)
> +		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
> +				  GRBM requires 1-cycle delay in cp firmware\n");
> +}
> +
> +
>   static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
>   {
>   	const struct rlc_firmware_header_v2_1 *rlc_hdr;
> @@ -4768,6 +4794,25 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>   }
>   
> +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +						  uint32_t reg0, uint32_t reg1,
> +						  uint32_t ref, uint32_t mask)
> +{
> +	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
> +	struct amdgpu_device *adev = ring->adev;
> +	bool fw_version_ok = false;
> +
> +	gfx_v10_0_check_fw_write_wait(adev);
> +	fw_version_ok = adev->gfx.cp_fw_write_wait;
> +
> +	if (fw_version_ok)
> +		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
> +				      ref, mask, 0x20);
> +	else
> +		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
> +							   ref, mask);
> +}
> +
>   static void
>   gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>   				      uint32_t me, uint32_t pipe,
> @@ -5158,6 +5203,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.emit_tmz = gfx_v10_0_ring_emit_tmz,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
> @@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
>   };
>   
>   static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 3b00bce14cfb..22c807309a22 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>   	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>   			      upper_32_bits(pd_addr));
>   
> -	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
> -
> -	/* wait for the invalidate to complete */
> -	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
> -				  1 << vmid, 1 << vmid);
> +	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
> +						hub->vm_inv_eng0_ack + eng,
> +						req, 1 << vmid);

You need to double check that all engines on gfx10 implement the 
emit_reg_write_reg_wait callback (probably as dummy with 
.emit_reg_write_reg_wait=amdgpu_ring_emit_reg_write_reg_wait_helper).

Otherwise that could quickly result in a NULL pointer deref here for the 
multimedia engines.

Apart from that the patch looks good to me now.

Thanks,
Christian.

>   
>   	return pd_addr;
>   }
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 3460c00f3eaa..7b15ddc739e5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>   			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
>   }
>   
> +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
> +												 uint32_t reg0, uint32_t reg1,
> +												 uint32_t ref, uint32_t mask)
> +{
> +	amdgpu_ring_emit_wreg(ring, reg0, ref);
> +	/* wait for a cycle to reset vm_inv_eng*_ack */
> +	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
> +	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
> +}
> +
>   static int sdma_v5_0_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
>   		/* sdma_v5_0_ring_emit_vm_flush */
>   		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
> -		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
>   		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
>   	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
>   	.emit_ib = sdma_v5_0_ring_emit_ib,
> @@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   	.pad_ib = sdma_v5_0_ring_pad_ib,
>   	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
>   	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
>   	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
>   	.preempt_ib = sdma_v5_0_ring_preempt_ib,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-05 11:42 ` Zhu, Changfeng
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu, Changfeng @ 2019-11-05 11:42 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Tuikov, Luben, Koenig,
	Christian, Huang, Ray, Huang, Shimmer
  Cc: Zhu, Changfeng

From: changzhu <Changfeng.Zhu@amd.com>

The GRBM register interface is now capable of bursting 1 cycle per
register wr->wr, wr->rd much faster than previous muticycle per
transaction done interface.  This has caused a problem where
status registers requiring HW to update have a 1 cycle delay, due
to the register update having to go through GRBM.

For cp ucode, it has realized dummy read in cp firmware.It covers
the use of WAIT_REG_MEM operation 1 case only.So it needs to call
gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to
update firmware in case firmware is too old to have function to realize
dummy read in cp firmware.

For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is
moved to gfxhub in gfx10. So it needs to add dummy read in driver
between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.

Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 47 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
 4 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 459aa9059542..a74ecd449775 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -267,6 +267,7 @@ struct amdgpu_gfx {
 	uint32_t			mec2_feature_version;
 	bool				mec_fw_write_wait;
 	bool				me_fw_write_wait;
+	bool				cp_fw_write_wait;
 	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
 	unsigned			num_gfx_rings;
 	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 17a5cbfd0024..e82b6d796b69 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
 	kfree(adev->gfx.rlc.register_list_format);
 }
 
+static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+	adev->gfx.cp_fw_write_wait = false;
+
+	switch (adev->asic_type) {
+	case CHIP_NAVI10:
+	case CHIP_NAVI12:
+	case CHIP_NAVI14:
+		if ((adev->gfx.me_fw_version >= 0x00000046) &&
+		    (adev->gfx.me_feature_version >= 27) &&
+		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
+		    (adev->gfx.pfp_feature_version >= 27) &&
+		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
+		    (adev->gfx.mec_feature_version >= 27))
+			adev->gfx.cp_fw_write_wait = true;
+		break;
+	default:
+		break;
+	}
+
+	if (adev->gfx.cp_fw_write_wait == false)
+		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
+				  GRBM requires 1-cycle delay in cp firmware\n");
+}
+
+
 static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
 {
 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
@@ -4768,6 +4794,25 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
 }
 
+static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+						  uint32_t reg0, uint32_t reg1,
+						  uint32_t ref, uint32_t mask)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+	struct amdgpu_device *adev = ring->adev;
+	bool fw_version_ok = false;
+
+	gfx_v10_0_check_fw_write_wait(adev);
+	fw_version_ok = adev->gfx.cp_fw_write_wait;
+
+	if (fw_version_ok)
+		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+				      ref, mask, 0x20);
+	else
+		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+							   ref, mask);
+}
+
 static void
 gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 				      uint32_t me, uint32_t pipe,
@@ -5158,6 +5203,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 	.emit_tmz = gfx_v10_0_ring_emit_tmz,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 3b00bce14cfb..22c807309a22 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
 			      upper_32_bits(pd_addr));
 
-	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
-
-	/* wait for the invalidate to complete */
-	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
-				  1 << vmid, 1 << vmid);
+	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
+						hub->vm_inv_eng0_ack + eng,
+						req, 1 << vmid);
 
 	return pd_addr;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 3460c00f3eaa..7b15ddc739e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
 }
 
+static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+												 uint32_t reg0, uint32_t reg1,
+												 uint32_t ref, uint32_t mask)
+{
+	amdgpu_ring_emit_wreg(ring, reg0, ref);
+	/* wait for a cycle to reset vm_inv_eng*_ack */
+	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
 static int sdma_v5_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
 		/* sdma_v5_0_ring_emit_vm_flush */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
-		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
 		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
 	.emit_ib = sdma_v5_0_ring_emit_ib,
@@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 	.pad_ib = sdma_v5_0_ring_pad_ib,
 	.emit_wreg = sdma_v5_0_ring_emit_wreg,
 	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
 	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
 	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
 	.preempt_ib = sdma_v5_0_ring_preempt_ib,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10
@ 2019-11-05 11:42 ` Zhu, Changfeng
  0 siblings, 0 replies; 20+ messages in thread
From: Zhu, Changfeng @ 2019-11-05 11:42 UTC (permalink / raw)
  To: amd-gfx, Tuikov,  Luben, Koenig, Christian, Huang, Ray, Huang, Shimmer
  Cc: Zhu, Changfeng

From: changzhu <Changfeng.Zhu@amd.com>

The GRBM register interface is now capable of bursting 1 cycle per
register wr->wr, wr->rd much faster than previous muticycle per
transaction done interface.  This has caused a problem where
status registers requiring HW to update have a 1 cycle delay, due
to the register update having to go through GRBM.

For cp ucode, it has realized dummy read in cp firmware.It covers
the use of WAIT_REG_MEM operation 1 case only.So it needs to call
gfx_v10_0_wait_reg_mem in gfx10. Besides it also needs to add warning to
update firmware in case firmware is too old to have function to realize
dummy read in cp firmware.

For sdma ucode, it hasn't realized dummy read in sdma firmware. sdma is
moved to gfxhub in gfx10. So it needs to add dummy read in driver
between amdgpu_ring_emit_wreg and amdgpu_ring_emit_reg_wait for sdma_v5_0.

Change-Id: Ie028f37eb789966d4593984bd661b248ebeb1ac3
Signed-off-by: changzhu <Changfeng.Zhu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 47 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  |  8 ++---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c  | 13 ++++++-
 4 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 459aa9059542..a74ecd449775 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -267,6 +267,7 @@ struct amdgpu_gfx {
 	uint32_t			mec2_feature_version;
 	bool				mec_fw_write_wait;
 	bool				me_fw_write_wait;
+	bool				cp_fw_write_wait;
 	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
 	unsigned			num_gfx_rings;
 	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 17a5cbfd0024..e82b6d796b69 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -561,6 +561,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
 	kfree(adev->gfx.rlc.register_list_format);
 }
 
+static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+	adev->gfx.cp_fw_write_wait = false;
+
+	switch (adev->asic_type) {
+	case CHIP_NAVI10:
+	case CHIP_NAVI12:
+	case CHIP_NAVI14:
+		if ((adev->gfx.me_fw_version >= 0x00000046) &&
+		    (adev->gfx.me_feature_version >= 27) &&
+		    (adev->gfx.pfp_fw_version >= 0x00000068) &&
+		    (adev->gfx.pfp_feature_version >= 27) &&
+		    (adev->gfx.mec_fw_version >= 0x0000005b) &&
+		    (adev->gfx.mec_feature_version >= 27))
+			adev->gfx.cp_fw_write_wait = true;
+		break;
+	default:
+		break;
+	}
+
+	if (adev->gfx.cp_fw_write_wait == false)
+		DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
+				  GRBM requires 1-cycle delay in cp firmware\n");
+}
+
+
 static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
 {
 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
@@ -4768,6 +4794,25 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 	gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
 }
 
+static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+						  uint32_t reg0, uint32_t reg1,
+						  uint32_t ref, uint32_t mask)
+{
+	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+	struct amdgpu_device *adev = ring->adev;
+	bool fw_version_ok = false;
+
+	gfx_v10_0_check_fw_write_wait(adev);
+	fw_version_ok = adev->gfx.cp_fw_write_wait;
+
+	if (fw_version_ok)
+		gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+				      ref, mask, 0x20);
+	else
+		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+							   ref, mask);
+}
+
 static void
 gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 				      uint32_t me, uint32_t pipe,
@@ -5158,6 +5203,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
 	.emit_tmz = gfx_v10_0_ring_emit_tmz,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -5191,6 +5237,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
 	.pad_ib = amdgpu_ring_generic_pad_ib,
 	.emit_wreg = gfx_v10_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
 };
 
 static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 3b00bce14cfb..22c807309a22 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -344,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
 	amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
 			      upper_32_bits(pd_addr));
 
-	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
-
-	/* wait for the invalidate to complete */
-	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
-				  1 << vmid, 1 << vmid);
+	amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
+						hub->vm_inv_eng0_ack + eng,
+						req, 1 << vmid);
 
 	return pd_addr;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 3460c00f3eaa..7b15ddc739e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1170,6 +1170,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
 }
 
+static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+												 uint32_t reg0, uint32_t reg1,
+												 uint32_t ref, uint32_t mask)
+{
+	amdgpu_ring_emit_wreg(ring, reg0, ref);
+	/* wait for a cycle to reset vm_inv_eng*_ack */
+	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
 static int sdma_v5_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1585,7 +1595,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 		6 + /* sdma_v5_0_ring_emit_pipeline_sync */
 		/* sdma_v5_0_ring_emit_vm_flush */
 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
-		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
 		10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
 	.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
 	.emit_ib = sdma_v5_0_ring_emit_ib,
@@ -1599,6 +1609,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
 	.pad_ib = sdma_v5_0_ring_pad_ib,
 	.emit_wreg = sdma_v5_0_ring_emit_wreg,
 	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
 	.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
 	.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
 	.preempt_ib = sdma_v5_0_ring_preempt_ib,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2019-11-06 13:07 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-06 10:52 [PATCH 1/2] drm/amdgpu: add dummy read by engines for some GCVM status registers in gfx10 Zhu, Changfeng
2019-11-06 10:52 ` Zhu, Changfeng
     [not found] ` <20191106105136.14413-1-changfeng.zhu-5C7GfCeVMHo@public.gmane.org>
2019-11-06 12:48   ` Koenig, Christian
2019-11-06 12:48     ` Koenig, Christian
     [not found]     ` <7509a1ed-eca7-ba36-49e4-5c1f8cd17b1b-5C7GfCeVMHo@public.gmane.org>
2019-11-06 12:50       ` Zhu, Changfeng
2019-11-06 12:50         ` Zhu, Changfeng
     [not found]         ` <MN2PR12MB28961D2B01E4B8B5018CE5C2FD790-rweVpJHSKToIQ/pRnFqe/QdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2019-11-06 13:07           ` Zhu, Changfeng
2019-11-06 13:07             ` Zhu, Changfeng
  -- strict thread matches above, loose matches on Subject: below --
2019-11-06  8:21 Zhu, Changfeng
2019-11-06  8:21 ` Zhu, Changfeng
     [not found] ` <20191106082051.10338-1-changfeng.zhu-5C7GfCeVMHo@public.gmane.org>
2019-11-06  9:26   ` Koenig, Christian
2019-11-06  9:26     ` Koenig, Christian
     [not found]     ` <bcf84101-ae80-666c-18cc-abbe2173627a-5C7GfCeVMHo@public.gmane.org>
2019-11-06 10:50       ` Zhu, Changfeng
2019-11-06 10:50         ` Zhu, Changfeng
2019-11-05 11:42 Zhu, Changfeng
2019-11-05 11:42 ` Zhu, Changfeng
     [not found] ` <20191105114133.4734-1-changfeng.zhu-5C7GfCeVMHo@public.gmane.org>
2019-11-05 11:58   ` Koenig, Christian
2019-11-05 11:58     ` Koenig, Christian
     [not found]     ` <e9ed971a-eb2b-70f5-f1de-411a5629ac78-5C7GfCeVMHo@public.gmane.org>
2019-11-06  8:15       ` Zhu, Changfeng
2019-11-06  8:15         ` Zhu, Changfeng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.