All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
@ 2021-06-21  7:00 Evan Quan
  2021-06-21  7:00 ` [PATCH V3 2/7] drm/amdgpu: fix Navi1x tcp power gating hang when issuing lightweight invalidaiton Evan Quan
                   ` (6 more replies)
  0 siblings, 7 replies; 14+ messages in thread
From: Evan Quan @ 2021-06-21  7:00 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Lijo.Lazar, Evan Quan

Add missing settings for SQC bits. And correct some confusing logics
around active wgp bitmap calculation.

Change-Id: If4992e175fd61d5609b00328cbe21f487517d039
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
V1->V2:
  - restore correct tcp_harvest setting for NV10 and NV12
  - move asic type guard upper layer for better readability
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 103 ++++++++++++++-----------
 1 file changed, 57 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 15ae9e33b925..384b95fbad8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -5090,47 +5090,50 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
 		4 + /* RMI */
 		1); /* SQG */
 
-	if (adev->asic_type == CHIP_NAVI10 ||
-	    adev->asic_type == CHIP_NAVI14 ||
-	    adev->asic_type == CHIP_NAVI12) {
-		mutex_lock(&adev->grbm_idx_mutex);
-		for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
-			for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-				gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
-				wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
-				/*
-				 * Set corresponding TCP bits for the inactive WGPs in
-				 * GCRD_SA_TARGETS_DISABLE
-				 */
-				gcrd_targets_disable_tcp = 0;
-				/* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
-				utcl_invreq_disable = 0;
-
-				for (k = 0; k < max_wgp_per_sh; k++) {
-					if (!(wgp_active_bitmap & (1 << k))) {
-						gcrd_targets_disable_tcp |= 3 << (2 * k);
-						utcl_invreq_disable |= (3 << (2 * k)) |
-							(3 << (2 * (max_wgp_per_sh + k)));
-					}
+	mutex_lock(&adev->grbm_idx_mutex);
+	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+			gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+			wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
+			/*
+			 * Set corresponding TCP bits for the inactive WGPs in
+			 * GCRD_SA_TARGETS_DISABLE
+			 */
+			gcrd_targets_disable_tcp = 0;
+			/* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
+			utcl_invreq_disable = 0;
+
+			for (k = 0; k < max_wgp_per_sh; k++) {
+				if (!(wgp_active_bitmap & (1 << k))) {
+					gcrd_targets_disable_tcp |= 3 << (2 * k);
+					gcrd_targets_disable_tcp |= 1 << (k + (max_wgp_per_sh * 2));
+					utcl_invreq_disable |= (3 << (2 * k)) |
+						(3 << (2 * (max_wgp_per_sh + k)));
 				}
-
-				tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE);
-				/* only override TCP & SQC bits */
-				tmp &= 0xffffffff << (4 * max_wgp_per_sh);
-				tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask);
-				WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
-
-				tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE);
-				/* only override TCP bits */
-				tmp &= 0xffffffff << (2 * max_wgp_per_sh);
-				tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask);
-				WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp);
 			}
-		}
 
-		gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
-		mutex_unlock(&adev->grbm_idx_mutex);
+			tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE);
+			/* only override TCP & SQC bits */
+			if (adev->asic_type == CHIP_NAVI14)
+				tmp &= 0xff000000;
+			else
+				tmp &=0xfff00000;
+			tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask);
+			WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
+
+			tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE);
+			/* only override TCP & SQC bits */
+			if (adev->asic_type == CHIP_NAVI14)
+				tmp &= 0xfffc0000;
+			else
+				tmp &= 0xffff8000;
+			tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask);
+			WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp);
+		}
 	}
+
+	gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+	mutex_unlock(&adev->grbm_idx_mutex);
 }
 
 static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev)
@@ -7408,7 +7411,10 @@ static int gfx_v10_0_hw_init(void *handle)
 	 * init golden registers and rlc resume may override some registers,
 	 * reconfig them here
 	 */
-	gfx_v10_0_tcp_harvest(adev);
+	if (adev->asic_type == CHIP_NAVI10 ||
+	    adev->asic_type == CHIP_NAVI14 ||
+	    adev->asic_type == CHIP_NAVI12)
+		gfx_v10_0_tcp_harvest(adev);
 
 	r = gfx_v10_0_cp_resume(adev);
 	if (r)
@@ -9328,17 +9334,22 @@ static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *
 
 static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
 {
-	u32 data, wgp_bitmask;
-	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
-	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
+	u32 disabled_mask =
+		~amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+	u32 efuse_setting = 0;
+	u32 vbios_setting = 0;
+
+	efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
+	efuse_setting &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+	efuse_setting >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
 
-	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
-	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
+	vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
+	vbios_setting &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
+	vbios_setting >>= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
 
-	wgp_bitmask =
-		amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
+	disabled_mask |= efuse_setting | vbios_setting;
 
-	return (~data) & wgp_bitmask;
+	return (~disabled_mask);
 }
 
 static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
-- 
2.29.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH V3 2/7] drm/amdgpu: fix Navi1x tcp power gating hang when issuing lightweight invalidaiton
  2021-06-21  7:00 [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting Evan Quan
@ 2021-06-21  7:00 ` Evan Quan
  2021-06-21  7:00 ` [PATCH V3 3/7] drm/amdgpu: fix NAK-G generation during PCI-e link width switch Evan Quan
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Evan Quan @ 2021-06-21  7:00 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Lijo.Lazar, Evan Quan

Fix TCP hang when a lightweight invalidation happens on Navi1x.

Change-Id: I5000fefa9ec48a5e863372d298354bed1562b332
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
V1->V2:
  - Alex: use ARRAY_SIZE instead of hard code
  - limit the changes for Navi1x only
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 95 ++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 384b95fbad8b..b170d5fb8993 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7971,6 +7971,97 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev,
 	}
 }
 
+static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_device *adev)
+{
+	uint32_t reg_data = 0;
+	uint32_t reg_idx = 0;
+	uint32_t i;
+
+	const uint32_t tcp_ctrl_regs[] = {
+		mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP12_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP12_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP12_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP12_CU1_TCP_CTRL_REG
+	};
+
+	const uint32_t tcp_ctrl_regs_nv12[] = {
+		mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP00_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP01_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP01_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP02_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP02_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP10_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP10_CU1_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP11_CU0_TCP_CTRL_REG,
+		mmCGTS_SA0_WGP11_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP00_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP00_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP01_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP01_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP02_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP02_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP10_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP10_CU1_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP11_CU0_TCP_CTRL_REG,
+		mmCGTS_SA1_WGP11_CU1_TCP_CTRL_REG,
+	};
+
+	const uint32_t sm_ctlr_regs[] = {
+		mmCGTS_SA0_QUAD0_SM_CTRL_REG,
+		mmCGTS_SA0_QUAD1_SM_CTRL_REG,
+		mmCGTS_SA1_QUAD0_SM_CTRL_REG,
+		mmCGTS_SA1_QUAD1_SM_CTRL_REG
+	};
+
+	if (adev->asic_type == CHIP_NAVI12) {
+		for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs_nv12); i++) {
+			reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] +
+				  tcp_ctrl_regs_nv12[i];
+			reg_data = RREG32(reg_idx);
+			reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK;
+			WREG32(reg_idx, reg_data);
+		}
+	} else {
+		for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs); i++) {
+			reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] +
+				  tcp_ctrl_regs[i];
+			reg_data = RREG32(reg_idx);
+			reg_data |= CGTS_SA0_WGP00_CU0_TCP_CTRL_REG__TCPI_LS_OVERRIDE_MASK;
+			WREG32(reg_idx, reg_data);
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(sm_ctlr_regs); i++) {
+		reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_QUAD0_SM_CTRL_REG_BASE_IDX] +
+			  sm_ctlr_regs[i];
+		reg_data = RREG32(reg_idx);
+		reg_data &= ~CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE_MASK;
+		reg_data |= 2 << CGTS_SA0_QUAD0_SM_CTRL_REG__SM_MODE__SHIFT;
+		WREG32(reg_idx, reg_data);
+	}
+}
+
 static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
 					    bool enable)
 {
@@ -7987,6 +8078,10 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
 		gfx_v10_0_update_3d_clock_gating(adev, enable);
 		/* ===  CGCG + CGLS === */
 		gfx_v10_0_update_coarse_grain_clock_gating(adev, enable);
+
+		if ((adev->asic_type >= CHIP_NAVI10) &&
+		     (adev->asic_type <= CHIP_NAVI12))
+			gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev);
 	} else {
 		/* CGCG/CGLS should be disabled before MGCG/MGLS
 		 * ===  CGCG + CGLS ===
-- 
2.29.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH V3 3/7] drm/amdgpu: fix NAK-G generation during PCI-e link width switch
  2021-06-21  7:00 [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting Evan Quan
  2021-06-21  7:00 ` [PATCH V3 2/7] drm/amdgpu: fix Navi1x tcp power gating hang when issuing lightweight invalidaiton Evan Quan
@ 2021-06-21  7:00 ` Evan Quan
  2021-06-21  7:00 ` [PATCH V3 4/7] drm/amdgpu: fix the hang caused by PCIe " Evan Quan
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Evan Quan @ 2021-06-21  7:00 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Lijo.Lazar, Evan Quan

A lot of NAK-G being generated when link widht switching is happening.
WA for this issue is to program the SPC to 4 symbols per clock during
bootup when the native PCIE width is x4.

Change-Id: I7a4d751e44bddc4bd1e97860cb4f53dfadc02a2c
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
V1->V2:
 - move the code to nbio_v2_3.c as that's where the pcie related changes
   reside
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h |  1 +
 drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c   | 28 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/nv.c          |  3 +++
 3 files changed, 32 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 25ee53545837..43d074bb00a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -93,6 +93,7 @@ struct amdgpu_nbio_funcs {
 	void (*enable_aspm)(struct amdgpu_device *adev,
 			    bool enable);
 	void (*program_aspm)(struct amdgpu_device *adev);
+	void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_nbio {
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index 05ddec7ba7e2..315d57bb373d 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -51,6 +51,8 @@
 #define mmBIF_MMSCH1_DOORBELL_RANGE		0x01d8
 #define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX	2
 
+#define smnPCIE_LC_LINK_WIDTH_CNTL		0x11140288
+
 static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev)
 {
 	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
@@ -463,6 +465,31 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev)
 		WREG32_PCIE(smnPCIE_LC_CNTL3, data);
 }
 
+static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev)
+{
+	uint32_t reg_data = 0;
+	uint32_t link_width = 0;
+
+	if (!((adev->asic_type >= CHIP_NAVI10) &&
+	     (adev->asic_type <= CHIP_NAVI12)))
+		return;
+
+	reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL);
+	link_width = (reg_data & PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK)
+		>> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT;
+
+	/*
+	 * Program PCIE_LC_CNTL6.LC_SPC_MODE_8GT to 0x2 (4 symbols per clock data)
+	 * if link_width is 0x3 (x4)
+	 */
+	if (0x3 == link_width) {
+		reg_data = RREG32_PCIE(smnPCIE_LC_CNTL6);
+		reg_data &= ~PCIE_LC_CNTL6__LC_SPC_MODE_8GT_MASK;
+		reg_data |= (0x2 << PCIE_LC_CNTL6__LC_SPC_MODE_8GT__SHIFT);
+		WREG32_PCIE(smnPCIE_LC_CNTL6, reg_data);
+	}
+}
+
 const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
 	.get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,
 	.get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset,
@@ -484,4 +511,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
 	.remap_hdp_registers = nbio_v2_3_remap_hdp_registers,
 	.enable_aspm = nbio_v2_3_enable_aspm,
 	.program_aspm =  nbio_v2_3_program_aspm,
+	.apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 455d0425787c..63c96ca8d2a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -1411,6 +1411,9 @@ static int nv_common_hw_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (adev->nbio.funcs->apply_lc_spc_mode_wa)
+		adev->nbio.funcs->apply_lc_spc_mode_wa(adev);
+
 	/* enable pcie gen2/3 link */
 	nv_pcie_gen3_enable(adev);
 	/* enable aspm */
-- 
2.29.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH V3 4/7] drm/amdgpu: fix the hang caused by PCIe link width switch
  2021-06-21  7:00 [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting Evan Quan
  2021-06-21  7:00 ` [PATCH V3 2/7] drm/amdgpu: fix Navi1x tcp power gating hang when issuing lightweight invalidaiton Evan Quan
  2021-06-21  7:00 ` [PATCH V3 3/7] drm/amdgpu: fix NAK-G generation during PCI-e link width switch Evan Quan
@ 2021-06-21  7:00 ` Evan Quan
  2021-06-21  7:00 ` [PATCH V3 5/7] drm/amdgpu: correct clock gating settings on feature unsupported Evan Quan
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Evan Quan @ 2021-06-21  7:00 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Lijo.Lazar, Evan Quan

SMU had set all the necessary fields for a link width switch
but the width switch wasn't occurring because the link was idle
in the L1 state. Setting LC_L1_RECONFIG_EN=0x1 will allow width
switches to also be initiated while in L1 instead of waiting until
the link is back in L0.

Change-Id: I6315681f6fb194036b20991512dd88fa65bc0d56
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
V1->V2:
  - limit the change for Navi10 only
V2->V3:
  - move the code to nbio_v2_3.c
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h |  1 +
 drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c   | 13 +++++++++++++
 drivers/gpu/drm/amd/amdgpu/nv.c          |  3 +++
 3 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 43d074bb00a1..45295dce5c3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -94,6 +94,7 @@ struct amdgpu_nbio_funcs {
 			    bool enable);
 	void (*program_aspm)(struct amdgpu_device *adev);
 	void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev);
+	void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_nbio {
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index 315d57bb373d..754b11dea6f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -490,6 +490,18 @@ static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev)
 	}
 }
 
+static void nbio_v2_3_apply_l1_link_width_reconfig_wa(struct amdgpu_device *adev)
+{
+	uint32_t reg_data = 0;
+
+	if (adev->asic_type != CHIP_NAVI10)
+		return;
+
+	reg_data = RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL);
+	reg_data |= PCIE_LC_LINK_WIDTH_CNTL__LC_L1_RECONFIG_EN_MASK;
+	WREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL, reg_data);
+}
+
 const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
 	.get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,
 	.get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset,
@@ -512,4 +524,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
 	.enable_aspm = nbio_v2_3_enable_aspm,
 	.program_aspm =  nbio_v2_3_program_aspm,
 	.apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa,
+	.apply_l1_link_width_reconfig_wa = nbio_v2_3_apply_l1_link_width_reconfig_wa,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 63c96ca8d2a2..5231b3402990 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -1414,6 +1414,9 @@ static int nv_common_hw_init(void *handle)
 	if (adev->nbio.funcs->apply_lc_spc_mode_wa)
 		adev->nbio.funcs->apply_lc_spc_mode_wa(adev);
 
+	if (adev->nbio.funcs->apply_l1_link_width_reconfig_wa)
+		adev->nbio.funcs->apply_l1_link_width_reconfig_wa(adev);
+
 	/* enable pcie gen2/3 link */
 	nv_pcie_gen3_enable(adev);
 	/* enable aspm */
-- 
2.29.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH V3 5/7] drm/amdgpu: correct clock gating settings on feature unsupported
  2021-06-21  7:00 [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting Evan Quan
                   ` (2 preceding siblings ...)
  2021-06-21  7:00 ` [PATCH V3 4/7] drm/amdgpu: fix the hang caused by PCIe " Evan Quan
@ 2021-06-21  7:00 ` Evan Quan
  2021-06-21  7:00 ` [PATCH V3 6/7] drm/amdgpu: update GFX MGCG settings Evan Quan
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 14+ messages in thread
From: Evan Quan @ 2021-06-21  7:00 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Lijo.Lazar, Evan Quan

Clock gating setting is still performed even when the corresponding
CG feature is not supported. And the tricky part is disablement is
actually performed no matter for enablement or disablement request.
That seems not logically right.
Considering HW should already properly take care of the CG state, we
will just skip the corresponding clock gating setting when the feature
is not supported.

Change-Id: Ic0995cf3de9f36b59316a90a28b7c95a08f4dccd
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/athub_v2_0.c  | 12 +++--
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 69 ++++++++++++++++++------
 drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c  | 10 +++-
 drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c   | 10 +++-
 drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c |  5 +-
 5 files changed, 83 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
index 5b90efd6f6d0..3ac505d954c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
@@ -36,9 +36,12 @@ athub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
 {
 	uint32_t def, data;
 
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+		return;
+
 	def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
 
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+	if (enable)
 		data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK;
 	else
 		data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK;
@@ -53,10 +56,13 @@ athub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
 {
 	uint32_t def, data;
 
+	if (!((adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
+	       (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)))
+		return;
+
 	def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL);
 
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) &&
-	    (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+	if (enable)
 		data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
 	else
 		data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index b170d5fb8993..5d0062f820c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7787,8 +7787,11 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
 {
 	uint32_t data, def;
 
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG))
+		return;
+
 	/* It is disabled by HW by default */
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+	if (enable) {
 		/* 0 - Disable some blocks' MGCG */
 		WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
 		WREG32_SOC15(GC, 0, mmCGTT_WD_CLK_CTRL, 0xff000000);
@@ -7855,22 +7858,34 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev,
 {
 	uint32_t data, def;
 
+	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)))
+		return;
+
 	/* Enable 3D CGCG/CGLS */
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
+	if (enable) {
 		/* write cmd to clear cgcg/cgls ov */
 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+
 		/* unset CGCG override */
-		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
+
 		/* update CGCG and CGLS override bits */
 		if (def != data)
 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
+
 		/* enable 3Dcgcg FSM(0x0000363f) */
 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
-		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
-			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+		data = 0;
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
 		if (def != data)
 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
 
@@ -7883,9 +7898,14 @@ static void gfx_v10_0_update_3d_clock_gating(struct amdgpu_device *adev,
 	} else {
 		/* Disable CGCG/CGLS */
 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
+
 		/* disable cgcg, cgls should be disabled */
-		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
-			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
+			data &= ~RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
+
 		/* disable cgcg and cgls in FSM */
 		if (def != data)
 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
@@ -7897,25 +7917,35 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
 {
 	uint32_t def, data;
 
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
+	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)))
+		return;
+
+	if (enable) {
 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
+
 		/* unset CGCG override */
-		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
+
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
-		else
-			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
+
 		/* update CGCG and CGLS override bits */
 		if (def != data)
 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
 
 		/* enable cgcg FSM(0x0000363F) */
 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
-		data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
-			RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+		data = 0;
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+			data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+				RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
 		if (def != data)
 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
 
@@ -7927,8 +7957,14 @@ static void gfx_v10_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade
 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
 	} else {
 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
+
 		/* reset CGCG/CGLS bits */
-		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
+			data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
+
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
+			data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
+
 		/* disable cgcg and cgls in FSM */
 		if (def != data)
 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
@@ -7940,7 +7976,10 @@ static void gfx_v10_0_update_fine_grain_clock_gating(struct amdgpu_device *adev,
 {
 	uint32_t def, data;
 
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG)) {
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
+		return;
+
+	if (enable) {
 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
 		/* unset FGCG override */
 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index f7e93bbc4e15..7ded6b2f058e 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -568,6 +568,9 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
 {
 	uint32_t def, data, def1, data1;
 
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
+		return;
+
 	switch (adev->asic_type) {
 	case CHIP_SIENNA_CICHLID:
 	case CHIP_NAVY_FLOUNDER:
@@ -582,7 +585,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
 		break;
 	}
 
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) {
+	if (enable) {
 		data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
 
 		data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK |
@@ -627,6 +630,9 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
 {
 	uint32_t def, data;
 
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
+		return;
+
 	switch (adev->asic_type) {
 	case CHIP_SIENNA_CICHLID:
 	case CHIP_NAVY_FLOUNDER:
@@ -639,7 +645,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
 		break;
 	}
 
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
+	if (enable)
 		data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
 	else
 		data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index 754b11dea6f0..7b79eeaa88aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -220,8 +220,11 @@ static void nbio_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *ade
 {
 	uint32_t def, data;
 
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG))
+		return;
+
 	def = data = RREG32_PCIE(smnCPM_CONTROL);
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) {
+	if (enable) {
 		data |= (CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK |
 			 CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK |
 			 CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK |
@@ -246,8 +249,11 @@ static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev
 {
 	uint32_t def, data;
 
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS))
+		return;
+
 	def = data = RREG32_PCIE(smnPCIE_CNTL2);
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
+	if (enable) {
 		data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK |
 			 PCIE_CNTL2__MST_MEM_LS_EN_MASK |
 			 PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK);
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
index e9c474c217ec..b6f1322f908c 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
@@ -43,9 +43,12 @@ static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool
 	if (adev->flags & AMD_IS_APU)
 		return;
 
+	if (!(adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG))
+		return;
+
 	def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0);
 
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG))
+	if (enable)
 		data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK |
 			CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK);
 	else
-- 
2.29.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH V3 6/7] drm/amdgpu: update GFX MGCG settings
  2021-06-21  7:00 [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting Evan Quan
                   ` (3 preceding siblings ...)
  2021-06-21  7:00 ` [PATCH V3 5/7] drm/amdgpu: correct clock gating settings on feature unsupported Evan Quan
@ 2021-06-21  7:00 ` Evan Quan
  2021-06-21  7:00 ` [PATCH V3 7/7] drm/amdgpu: update HDP LS settings Evan Quan
  2021-06-21 12:07 ` [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting Lazar, Lijo
  6 siblings, 0 replies; 14+ messages in thread
From: Evan Quan @ 2021-06-21  7:00 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Lijo.Lazar, Evan Quan

Update GFX MGCG related settings.

Change-Id: I0b7b8e7c97859f99db5f52026abbb4d226c179df
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 5d0062f820c1..b855a7c5bc4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7787,11 +7787,11 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
 {
 	uint32_t data, def;
 
-	if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG))
+	if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
 		return;
 
 	/* It is disabled by HW by default */
-	if (enable) {
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
 		/* 0 - Disable some blocks' MGCG */
 		WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
 		WREG32_SOC15(GC, 0, mmCGTT_WD_CLK_CTRL, 0xff000000);
@@ -7804,6 +7804,7 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
 			  RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK |
+			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK |
 			  RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK);
 
 		if (def != data)
@@ -7826,13 +7827,15 @@ static void gfx_v10_0_update_medium_grain_clock_gating(struct amdgpu_device *ade
 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
 			}
 		}
-	} else {
+	} else if (!enable || !(adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
 		/* 1 - MGCG_OVERRIDE */
 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
-			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
+			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK |
+			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK |
+			 RLC_CGTT_MGCG_OVERRIDE__ENABLE_CGTS_LEGACY_MASK);
 		if (def != data)
 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
 
-- 
2.29.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH V3 7/7] drm/amdgpu: update HDP LS settings
  2021-06-21  7:00 [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting Evan Quan
                   ` (4 preceding siblings ...)
  2021-06-21  7:00 ` [PATCH V3 6/7] drm/amdgpu: update GFX MGCG settings Evan Quan
@ 2021-06-21  7:00 ` Evan Quan
  2021-06-21 12:07 ` [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting Lazar, Lijo
  6 siblings, 0 replies; 14+ messages in thread
From: Evan Quan @ 2021-06-21  7:00 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alexander.Deucher, Lijo.Lazar, Evan Quan

Avoid unnecessary register programming on feature disablement.

Change-Id: Ia8ad4fb28cb23f80ddcf1399eace284e4d33bd90
Signed-off-by: Evan Quan <evan.quan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c | 85 +++++++++++++++------------
 1 file changed, 48 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
index 7a15e669b68d..5793977953cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
@@ -90,45 +90,56 @@ static void hdp_v5_0_update_mem_power_gating(struct amdgpu_device *adev,
 					 RC_MEM_POWER_SD_EN, 0);
 	WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
 
-	/* only one clock gating mode (LS/DS/SD) can be enabled */
-	if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
-		hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
-						 HDP_MEM_POWER_CTRL,
-						 IPH_MEM_POWER_LS_EN, enable);
-		hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
-						 HDP_MEM_POWER_CTRL,
-						 RC_MEM_POWER_LS_EN, enable);
-	} else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
-		hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
-						 HDP_MEM_POWER_CTRL,
-						 IPH_MEM_POWER_DS_EN, enable);
-		hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
-						 HDP_MEM_POWER_CTRL,
-						 RC_MEM_POWER_DS_EN, enable);
-	} else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
-		hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
-						 HDP_MEM_POWER_CTRL,
-						 IPH_MEM_POWER_SD_EN, enable);
-		/* RC should not use shut down mode, fallback to ds */
-		hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
-						 HDP_MEM_POWER_CTRL,
-						 RC_MEM_POWER_DS_EN, enable);
-	}
-
-	/* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
-	 * be set for SRAM LS/DS/SD */
-	if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
-			      AMD_CG_SUPPORT_HDP_SD)) {
-		hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
-						 IPH_MEM_POWER_CTRL_EN, 1);
-		hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
-						 RC_MEM_POWER_CTRL_EN, 1);
+	/* Already disabled above. The actions below are for "enabled" only */
+	if (enable) {
+		/* only one clock gating mode (LS/DS/SD) can be enabled */
+		if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) {
+			hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+							 HDP_MEM_POWER_CTRL,
+							 IPH_MEM_POWER_LS_EN, 1);
+			hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+							 HDP_MEM_POWER_CTRL,
+							 RC_MEM_POWER_LS_EN, 1);
+		} else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) {
+			hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+							 HDP_MEM_POWER_CTRL,
+							 IPH_MEM_POWER_DS_EN, 1);
+			hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+							 HDP_MEM_POWER_CTRL,
+							 RC_MEM_POWER_DS_EN, 1);
+		} else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) {
+			hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+							 HDP_MEM_POWER_CTRL,
+							 IPH_MEM_POWER_SD_EN, 1);
+			/* RC should not use shut down mode, fallback to ds  or ls if allowed */
+			if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS)
+				hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+								 HDP_MEM_POWER_CTRL,
+								 RC_MEM_POWER_DS_EN, 1);
+			else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)
+				hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl,
+								 HDP_MEM_POWER_CTRL,
+								 RC_MEM_POWER_LS_EN, 1);
+		}
+
+		/* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to
+		 * be set for SRAM LS/DS/SD */
+		if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS |
+				      AMD_CG_SUPPORT_HDP_SD)) {
+			hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+							 IPH_MEM_POWER_CTRL_EN, 1);
+			hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL,
+							 RC_MEM_POWER_CTRL_EN, 1);
+			WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
+		}
 	}
 
-	WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl);
-
-	/* restore IPH & RC clock override after clock/power mode changing */
-	WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl1);
+	/* disable IPH & RC clock override after clock/power mode changing */
+	hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+				     IPH_MEM_CLK_SOFT_OVERRIDE, 0);
+	hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL,
+				     RC_MEM_CLK_SOFT_OVERRIDE, 0);
+	WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl);
 }
 
 static void hdp_v5_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
-- 
2.29.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
  2021-06-21  7:00 [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting Evan Quan
                   ` (5 preceding siblings ...)
  2021-06-21  7:00 ` [PATCH V3 7/7] drm/amdgpu: update HDP LS settings Evan Quan
@ 2021-06-21 12:07 ` Lazar, Lijo
  2021-06-22  2:25   ` Quan, Evan
  6 siblings, 1 reply; 14+ messages in thread
From: Lazar, Lijo @ 2021-06-21 12:07 UTC (permalink / raw)
  To: Evan Quan, amd-gfx; +Cc: Alexander.Deucher

One minor comment below, apart from that the series looks good.

Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>

On 6/21/2021 12:30 PM, Evan Quan wrote:
> Add missing settings for SQC bits. And correct some confusing logics
> around active wgp bitmap calculation.
> 
> Change-Id: If4992e175fd61d5609b00328cbe21f487517d039
> Signed-off-by: Evan Quan <evan.quan@amd.com>
> ---
> V1->V2:
>    - restore correct tcp_harvest setting for NV10 and NV12
>    - move asic type guard upper layer for better readability
> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 103 ++++++++++++++-----------
>   1 file changed, 57 insertions(+), 46 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 15ae9e33b925..384b95fbad8b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -5090,47 +5090,50 @@ static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
>   		4 + /* RMI */
>   		1); /* SQG */
>   
> -	if (adev->asic_type == CHIP_NAVI10 ||
> -	    adev->asic_type == CHIP_NAVI14 ||
> -	    adev->asic_type == CHIP_NAVI12) {
> -		mutex_lock(&adev->grbm_idx_mutex);
> -		for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
> -			for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> -				gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
> -				wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
> -				/*
> -				 * Set corresponding TCP bits for the inactive WGPs in
> -				 * GCRD_SA_TARGETS_DISABLE
> -				 */
> -				gcrd_targets_disable_tcp = 0;
> -				/* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
> -				utcl_invreq_disable = 0;
> -
> -				for (k = 0; k < max_wgp_per_sh; k++) {
> -					if (!(wgp_active_bitmap & (1 << k))) {
> -						gcrd_targets_disable_tcp |= 3 << (2 * k);
> -						utcl_invreq_disable |= (3 << (2 * k)) |
> -							(3 << (2 * (max_wgp_per_sh + k)));
> -					}
> +	mutex_lock(&adev->grbm_idx_mutex);
> +	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
> +		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> +			gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
> +			wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
> +			/*
> +			 * Set corresponding TCP bits for the inactive WGPs in
> +			 * GCRD_SA_TARGETS_DISABLE
> +			 */
> +			gcrd_targets_disable_tcp = 0;
> +			/* Set TCP & SQC bits in UTCL1_UTCL0_INVREQ_DISABLE */
> +			utcl_invreq_disable = 0;
> +
> +			for (k = 0; k < max_wgp_per_sh; k++) {
> +				if (!(wgp_active_bitmap & (1 << k))) {
> +					gcrd_targets_disable_tcp |= 3 << (2 * k);
> +					gcrd_targets_disable_tcp |= 1 << (k + (max_wgp_per_sh * 2));
> +					utcl_invreq_disable |= (3 << (2 * k)) |
> +						(3 << (2 * (max_wgp_per_sh + k)));
>   				}
> -
> -				tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE);
> -				/* only override TCP & SQC bits */
> -				tmp &= 0xffffffff << (4 * max_wgp_per_sh);
> -				tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask);
> -				WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
> -
> -				tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE);
> -				/* only override TCP bits */
> -				tmp &= 0xffffffff << (2 * max_wgp_per_sh);
> -				tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask);
> -				WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp);
>   			}
> -		}
>   
> -		gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
> -		mutex_unlock(&adev->grbm_idx_mutex);
> +			tmp = RREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE);
> +			/* only override TCP & SQC bits */
> +			if (adev->asic_type == CHIP_NAVI14)
> +				tmp &= 0xff000000;
> +			else
> +				tmp &=0xfff00000;

For the disable field mask calculation (which is the value that is 
applied finally), there is no ASIC check. The above code may utilize the 
same method as in the original code without ASIC check.

tmp &= 0xffffffff << (4 * max_wgp_per_sh);

Same for below case also - 3*max_wgp_per_sh.

Thanks,
Lijo

> +			tmp |= (utcl_invreq_disable & utcl_invreq_disable_mask);
> +			WREG32_SOC15(GC, 0, mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
> +
> +			tmp = RREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE);
> +			/* only override TCP & SQC bits */
> +			if (adev->asic_type == CHIP_NAVI14)
> +				tmp &= 0xfffc0000;
> +			else
> +				tmp &= 0xffff8000;
> +			tmp |= (gcrd_targets_disable_tcp & gcrd_targets_disable_mask);
> +			WREG32_SOC15(GC, 0, mmGCRD_SA_TARGETS_DISABLE, tmp);
> +		}
>   	}
> +
> +	gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
> +	mutex_unlock(&adev->grbm_idx_mutex);
>   }
>   
>   static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev)
> @@ -7408,7 +7411,10 @@ static int gfx_v10_0_hw_init(void *handle)
>   	 * init golden registers and rlc resume may override some registers,
>   	 * reconfig them here
>   	 */
> -	gfx_v10_0_tcp_harvest(adev);
> +	if (adev->asic_type == CHIP_NAVI10 ||
> +	    adev->asic_type == CHIP_NAVI14 ||
> +	    adev->asic_type == CHIP_NAVI12)
> +		gfx_v10_0_tcp_harvest(adev);
>   
>   	r = gfx_v10_0_cp_resume(adev);
>   	if (r)
> @@ -9328,17 +9334,22 @@ static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *
>   
>   static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev)
>   {
> -	u32 data, wgp_bitmask;
> -	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
> -	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
> +	u32 disabled_mask =
> +		~amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
> +	u32 efuse_setting = 0;
> +	u32 vbios_setting = 0;
> +
> +	efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
> +	efuse_setting &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
> +	efuse_setting >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
>   
> -	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
> -	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
> +	vbios_setting = RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
> +	vbios_setting &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
> +	vbios_setting >>= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
>   
> -	wgp_bitmask =
> -		amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh >> 1);
> +	disabled_mask |= efuse_setting | vbios_setting;
>   
> -	return (~data) & wgp_bitmask;
> +	return (~disabled_mask);
>   }
>   
>   static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev)
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
  2021-06-21 12:07 ` [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting Lazar, Lijo
@ 2021-06-22  2:25   ` Quan, Evan
  2021-06-22  6:08     ` Lazar, Lijo
  0 siblings, 1 reply; 14+ messages in thread
From: Quan, Evan @ 2021-06-22  2:25 UTC (permalink / raw)
  To: Lazar, Lijo, amd-gfx; +Cc: Deucher, Alexander

[AMD Official Use Only]

Thanks Lijo.
However, I'm not quite sure whether " 0xffffffff << (4 * max_wgp_per_sh);" is a valid expression since it kind of triggers some overflow.
Can that work for non-x86 platform or even work reliably for x86 platform?

BR
Evan
> -----Original Message-----
> From: Lazar, Lijo <Lijo.Lazar@amd.com>
> Sent: Monday, June 21, 2021 8:08 PM
> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: Re: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
> 
> One minor comment below, apart from that the series looks good.
> 
> Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
> 
> On 6/21/2021 12:30 PM, Evan Quan wrote:
> > Add missing settings for SQC bits. And correct some confusing logics
> > around active wgp bitmap calculation.
> >
> > Change-Id: If4992e175fd61d5609b00328cbe21f487517d039
> > Signed-off-by: Evan Quan <evan.quan@amd.com>
> > ---
> > V1->V2:
> >    - restore correct tcp_harvest setting for NV10 and NV12
> >    - move asic type guard upper layer for better readability
> > ---
> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 103 ++++++++++++++------
> -----
> >   1 file changed, 57 insertions(+), 46 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index 15ae9e33b925..384b95fbad8b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -5090,47 +5090,50 @@ static void gfx_v10_0_tcp_harvest(struct
> amdgpu_device *adev)
> >   		4 + /* RMI */
> >   		1); /* SQG */
> >
> > -	if (adev->asic_type == CHIP_NAVI10 ||
> > -	    adev->asic_type == CHIP_NAVI14 ||
> > -	    adev->asic_type == CHIP_NAVI12) {
> > -		mutex_lock(&adev->grbm_idx_mutex);
> > -		for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
> > -			for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> > -				gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
> > -				wgp_active_bitmap =
> gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
> > -				/*
> > -				 * Set corresponding TCP bits for the inactive
> WGPs in
> > -				 * GCRD_SA_TARGETS_DISABLE
> > -				 */
> > -				gcrd_targets_disable_tcp = 0;
> > -				/* Set TCP & SQC bits in
> UTCL1_UTCL0_INVREQ_DISABLE */
> > -				utcl_invreq_disable = 0;
> > -
> > -				for (k = 0; k < max_wgp_per_sh; k++) {
> > -					if (!(wgp_active_bitmap & (1 << k))) {
> > -						gcrd_targets_disable_tcp |=
> 3 << (2 * k);
> > -						utcl_invreq_disable |= (3 <<
> (2 * k)) |
> > -							(3 << (2 *
> (max_wgp_per_sh + k)));
> > -					}
> > +	mutex_lock(&adev->grbm_idx_mutex);
> > +	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
> > +		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> > +			gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
> > +			wgp_active_bitmap =
> gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
> > +			/*
> > +			 * Set corresponding TCP bits for the inactive WGPs in
> > +			 * GCRD_SA_TARGETS_DISABLE
> > +			 */
> > +			gcrd_targets_disable_tcp = 0;
> > +			/* Set TCP & SQC bits in
> UTCL1_UTCL0_INVREQ_DISABLE */
> > +			utcl_invreq_disable = 0;
> > +
> > +			for (k = 0; k < max_wgp_per_sh; k++) {
> > +				if (!(wgp_active_bitmap & (1 << k))) {
> > +					gcrd_targets_disable_tcp |= 3 << (2 *
> k);
> > +					gcrd_targets_disable_tcp |= 1 << (k +
> (max_wgp_per_sh * 2));
> > +					utcl_invreq_disable |= (3 << (2 * k)) |
> > +						(3 << (2 * (max_wgp_per_sh
> + k)));
> >   				}
> > -
> > -				tmp = RREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE);
> > -				/* only override TCP & SQC bits */
> > -				tmp &= 0xffffffff << (4 * max_wgp_per_sh);
> > -				tmp |= (utcl_invreq_disable &
> utcl_invreq_disable_mask);
> > -				WREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
> > -
> > -				tmp = RREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE);
> > -				/* only override TCP bits */
> > -				tmp &= 0xffffffff << (2 * max_wgp_per_sh);
> > -				tmp |= (gcrd_targets_disable_tcp &
> gcrd_targets_disable_mask);
> > -				WREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE, tmp);
> >   			}
> > -		}
> >
> > -		gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
> > -		mutex_unlock(&adev->grbm_idx_mutex);
> > +			tmp = RREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE);
> > +			/* only override TCP & SQC bits */
> > +			if (adev->asic_type == CHIP_NAVI14)
> > +				tmp &= 0xff000000;
> > +			else
> > +				tmp &=0xfff00000;
> 
> For the disable field mask calculation (which is the value that is applied finally),
> there is no ASIC check. The above code may utilize the same method as in
> the original code without ASIC check.
> 
> tmp &= 0xffffffff << (4 * max_wgp_per_sh);
> 
> Same for below case also - 3*max_wgp_per_sh.
> 
> Thanks,
> Lijo
> 
> > +			tmp |= (utcl_invreq_disable &
> utcl_invreq_disable_mask);
> > +			WREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
> > +
> > +			tmp = RREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE);
> > +			/* only override TCP & SQC bits */
> > +			if (adev->asic_type == CHIP_NAVI14)
> > +				tmp &= 0xfffc0000;
> > +			else
> > +				tmp &= 0xffff8000;
> > +			tmp |= (gcrd_targets_disable_tcp &
> gcrd_targets_disable_mask);
> > +			WREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE, tmp);
> > +		}
> >   	}
> > +
> > +	gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
> > +	mutex_unlock(&adev->grbm_idx_mutex);
> >   }
> >
> >   static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) @@
> > -7408,7 +7411,10 @@ static int gfx_v10_0_hw_init(void *handle)
> >   	 * init golden registers and rlc resume may override some registers,
> >   	 * reconfig them here
> >   	 */
> > -	gfx_v10_0_tcp_harvest(adev);
> > +	if (adev->asic_type == CHIP_NAVI10 ||
> > +	    adev->asic_type == CHIP_NAVI14 ||
> > +	    adev->asic_type == CHIP_NAVI12)
> > +		gfx_v10_0_tcp_harvest(adev);
> >
> >   	r = gfx_v10_0_cp_resume(adev);
> >   	if (r)
> > @@ -9328,17 +9334,22 @@ static void
> > gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device
> *
> >
> >   static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct
> amdgpu_device *adev)
> >   {
> > -	u32 data, wgp_bitmask;
> > -	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
> > -	data |= RREG32_SOC15(GC, 0,
> mmGC_USER_SHADER_ARRAY_CONFIG);
> > +	u32 disabled_mask =
> > +		~amdgpu_gfx_create_bitmask(adev-
> >gfx.config.max_cu_per_sh >> 1);
> > +	u32 efuse_setting = 0;
> > +	u32 vbios_setting = 0;
> > +
> > +	efuse_setting = RREG32_SOC15(GC, 0,
> mmCC_GC_SHADER_ARRAY_CONFIG);
> > +	efuse_setting &=
> CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
> > +	efuse_setting >>=
> CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
> >
> > -	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
> > -	data >>=
> CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
> > +	vbios_setting = RREG32_SOC15(GC, 0,
> mmGC_USER_SHADER_ARRAY_CONFIG);
> > +	vbios_setting &=
> GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
> > +	vbios_setting >>=
> GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
> >
> > -	wgp_bitmask =
> > -		amdgpu_gfx_create_bitmask(adev-
> >gfx.config.max_cu_per_sh >> 1);
> > +	disabled_mask |= efuse_setting | vbios_setting;
> >
> > -	return (~data) & wgp_bitmask;
> > +	return (~disabled_mask);
> >   }
> >
> >   static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct
> > amdgpu_device *adev)
> >
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
  2021-06-22  2:25   ` Quan, Evan
@ 2021-06-22  6:08     ` Lazar, Lijo
  2021-06-22  9:19       ` Michel Dänzer
  0 siblings, 1 reply; 14+ messages in thread
From: Lazar, Lijo @ 2021-06-22  6:08 UTC (permalink / raw)
  To: Quan, Evan, amd-gfx; +Cc: Deucher, Alexander

[Public]

AFAIK, that expression is legal (some code analyzer may warn on value of 4*max_wgp_per_sh); similar kind is used in rotate shift operations.

Thanks,
Lijo

-----Original Message-----
From: Quan, Evan <Evan.Quan@amd.com> 
Sent: Tuesday, June 22, 2021 7:56 AM
To: Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
Subject: RE: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting

[AMD Official Use Only]

Thanks Lijo.
However, I'm not quite sure whether " 0xffffffff << (4 * max_wgp_per_sh);" is a valid expression since it kind of triggers some overflow.
Can that work for non-x86 platform or even work reliably for x86 platform?

BR
Evan
> -----Original Message-----
> From: Lazar, Lijo <Lijo.Lazar@amd.com>
> Sent: Monday, June 21, 2021 8:08 PM
> To: Quan, Evan <Evan.Quan@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: Re: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
> 
> One minor comment below, apart from that the series looks good.
> 
> Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
> 
> On 6/21/2021 12:30 PM, Evan Quan wrote:
> > Add missing settings for SQC bits. And correct some confusing logics 
> > around active wgp bitmap calculation.
> >
> > Change-Id: If4992e175fd61d5609b00328cbe21f487517d039
> > Signed-off-by: Evan Quan <evan.quan@amd.com>
> > ---
> > V1->V2:
> >    - restore correct tcp_harvest setting for NV10 and NV12
> >    - move asic type guard upper layer for better readability
> > ---
> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 103 ++++++++++++++------
> -----
> >   1 file changed, 57 insertions(+), 46 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index 15ae9e33b925..384b95fbad8b 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -5090,47 +5090,50 @@ static void gfx_v10_0_tcp_harvest(struct
> amdgpu_device *adev)
> >   		4 + /* RMI */
> >   		1); /* SQG */
> >
> > -	if (adev->asic_type == CHIP_NAVI10 ||
> > -	    adev->asic_type == CHIP_NAVI14 ||
> > -	    adev->asic_type == CHIP_NAVI12) {
> > -		mutex_lock(&adev->grbm_idx_mutex);
> > -		for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
> > -			for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> > -				gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
> > -				wgp_active_bitmap =
> gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
> > -				/*
> > -				 * Set corresponding TCP bits for the inactive
> WGPs in
> > -				 * GCRD_SA_TARGETS_DISABLE
> > -				 */
> > -				gcrd_targets_disable_tcp = 0;
> > -				/* Set TCP & SQC bits in
> UTCL1_UTCL0_INVREQ_DISABLE */
> > -				utcl_invreq_disable = 0;
> > -
> > -				for (k = 0; k < max_wgp_per_sh; k++) {
> > -					if (!(wgp_active_bitmap & (1 << k))) {
> > -						gcrd_targets_disable_tcp |=
> 3 << (2 * k);
> > -						utcl_invreq_disable |= (3 <<
> (2 * k)) |
> > -							(3 << (2 *
> (max_wgp_per_sh + k)));
> > -					}
> > +	mutex_lock(&adev->grbm_idx_mutex);
> > +	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
> > +		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
> > +			gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
> > +			wgp_active_bitmap =
> gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
> > +			/*
> > +			 * Set corresponding TCP bits for the inactive WGPs in
> > +			 * GCRD_SA_TARGETS_DISABLE
> > +			 */
> > +			gcrd_targets_disable_tcp = 0;
> > +			/* Set TCP & SQC bits in
> UTCL1_UTCL0_INVREQ_DISABLE */
> > +			utcl_invreq_disable = 0;
> > +
> > +			for (k = 0; k < max_wgp_per_sh; k++) {
> > +				if (!(wgp_active_bitmap & (1 << k))) {
> > +					gcrd_targets_disable_tcp |= 3 << (2 *
> k);
> > +					gcrd_targets_disable_tcp |= 1 << (k +
> (max_wgp_per_sh * 2));
> > +					utcl_invreq_disable |= (3 << (2 * k)) |
> > +						(3 << (2 * (max_wgp_per_sh
> + k)));
> >   				}
> > -
> > -				tmp = RREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE);
> > -				/* only override TCP & SQC bits */
> > -				tmp &= 0xffffffff << (4 * max_wgp_per_sh);
> > -				tmp |= (utcl_invreq_disable &
> utcl_invreq_disable_mask);
> > -				WREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
> > -
> > -				tmp = RREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE);
> > -				/* only override TCP bits */
> > -				tmp &= 0xffffffff << (2 * max_wgp_per_sh);
> > -				tmp |= (gcrd_targets_disable_tcp &
> gcrd_targets_disable_mask);
> > -				WREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE, tmp);
> >   			}
> > -		}
> >
> > -		gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
> > -		mutex_unlock(&adev->grbm_idx_mutex);
> > +			tmp = RREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE);
> > +			/* only override TCP & SQC bits */
> > +			if (adev->asic_type == CHIP_NAVI14)
> > +				tmp &= 0xff000000;
> > +			else
> > +				tmp &=0xfff00000;
> 
> For the disable field mask calculation (which is the value that is 
> applied finally), there is no ASIC check. The above code may utilize 
> the same method as in the original code without ASIC check.
> 
> tmp &= 0xffffffff << (4 * max_wgp_per_sh);
> 
> Same for below case also - 3*max_wgp_per_sh.
> 
> Thanks,
> Lijo
> 
> > +			tmp |= (utcl_invreq_disable &
> utcl_invreq_disable_mask);
> > +			WREG32_SOC15(GC, 0,
> mmUTCL1_UTCL0_INVREQ_DISABLE, tmp);
> > +
> > +			tmp = RREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE);
> > +			/* only override TCP & SQC bits */
> > +			if (adev->asic_type == CHIP_NAVI14)
> > +				tmp &= 0xfffc0000;
> > +			else
> > +				tmp &= 0xffff8000;
> > +			tmp |= (gcrd_targets_disable_tcp &
> gcrd_targets_disable_mask);
> > +			WREG32_SOC15(GC, 0,
> mmGCRD_SA_TARGETS_DISABLE, tmp);
> > +		}
> >   	}
> > +
> > +	gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
> > +	mutex_unlock(&adev->grbm_idx_mutex);
> >   }
> >
> >   static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) @@
> > -7408,7 +7411,10 @@ static int gfx_v10_0_hw_init(void *handle)
> >   	 * init golden registers and rlc resume may override some registers,
> >   	 * reconfig them here
> >   	 */
> > -	gfx_v10_0_tcp_harvest(adev);
> > +	if (adev->asic_type == CHIP_NAVI10 ||
> > +	    adev->asic_type == CHIP_NAVI14 ||
> > +	    adev->asic_type == CHIP_NAVI12)
> > +		gfx_v10_0_tcp_harvest(adev);
> >
> >   	r = gfx_v10_0_cp_resume(adev);
> >   	if (r)
> > @@ -9328,17 +9334,22 @@ static void
> > gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device
> *
> >
> >   static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct
> amdgpu_device *adev)
> >   {
> > -	u32 data, wgp_bitmask;
> > -	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
> > -	data |= RREG32_SOC15(GC, 0,
> mmGC_USER_SHADER_ARRAY_CONFIG);
> > +	u32 disabled_mask =
> > +		~amdgpu_gfx_create_bitmask(adev-
> >gfx.config.max_cu_per_sh >> 1);
> > +	u32 efuse_setting = 0;
> > +	u32 vbios_setting = 0;
> > +
> > +	efuse_setting = RREG32_SOC15(GC, 0,
> mmCC_GC_SHADER_ARRAY_CONFIG);
> > +	efuse_setting &=
> CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
> > +	efuse_setting >>=
> CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
> >
> > -	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
> > -	data >>=
> CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
> > +	vbios_setting = RREG32_SOC15(GC, 0,
> mmGC_USER_SHADER_ARRAY_CONFIG);
> > +	vbios_setting &=
> GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
> > +	vbios_setting >>=
> GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
> >
> > -	wgp_bitmask =
> > -		amdgpu_gfx_create_bitmask(adev-
> >gfx.config.max_cu_per_sh >> 1);
> > +	disabled_mask |= efuse_setting | vbios_setting;
> >
> > -	return (~data) & wgp_bitmask;
> > +	return (~disabled_mask);
> >   }
> >
> >   static u32 gfx_v10_0_get_cu_active_bitmap_per_sh(struct
> > amdgpu_device *adev)
> >
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
  2021-06-22  6:08     ` Lazar, Lijo
@ 2021-06-22  9:19       ` Michel Dänzer
  2021-06-22 10:40         ` Lazar, Lijo
  0 siblings, 1 reply; 14+ messages in thread
From: Michel Dänzer @ 2021-06-22  9:19 UTC (permalink / raw)
  To: Lazar, Lijo, Quan, Evan; +Cc: Deucher, Alexander, amd-gfx

On 2021-06-22 8:08 a.m., Lazar, Lijo wrote:
> [Public]
> 
> AFAIK, that expression is legal (some code analyzer may warn on value of 4*max_wgp_per_sh); similar kind is used in rotate shift operations.

The default type for constants in C is int, so 0xffffffff is a 32-bit signed integer.

The C99 specification lists this under J.2 Undefined behavior:

— An expression having signed promoted type is left-shifted and either the value of the
 expression is negative or the result of shifting would be not be representable in the
 promoted type (6.5.7).

So it would be safer to make it unsigned: 0xffffffffu (or just ~0u).


> -----Original Message-----
> From: Quan, Evan <Evan.Quan@amd.com> 
> Sent: Tuesday, June 22, 2021 7:56 AM
> To: Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> Subject: RE: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
> 
> [AMD Official Use Only]
> 
> Thanks Lijo.
> However, I'm not quite sure whether " 0xffffffff << (4 * max_wgp_per_sh);" is a valid expression since it kind of triggers some overflow.
> Can that work for non-x86 platform or even work reliably for x86 platform?



-- 
Earthling Michel Dänzer               |               https://redhat.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
  2021-06-22  9:19       ` Michel Dänzer
@ 2021-06-22 10:40         ` Lazar, Lijo
  2021-06-23  2:04           ` Quan, Evan
  2021-06-23  6:53           ` Michel Dänzer
  0 siblings, 2 replies; 14+ messages in thread
From: Lazar, Lijo @ 2021-06-22 10:40 UTC (permalink / raw)
  To: Michel Dänzer, Quan, Evan; +Cc: Deucher, Alexander, amd-gfx



On 6/22/2021 2:49 PM, Michel Dänzer wrote:
> On 2021-06-22 8:08 a.m., Lazar, Lijo wrote:
>> [Public]
>>
>> AFAIK, that expression is legal (some code analyzer may warn on value of 4*max_wgp_per_sh); similar kind is used in rotate shift operations.
> 
> The default type for constants in C is int, so 0xffffffff is a 32-bit signed integer.

Probably not as per section 6.4.4.

"The type of an integer constant is the first of the corresponding list 
in which its value can be represented."

It is a hexadecimal constant and the first to fit this value is unsigned 
int. Regardless, adding u suffix will avoid any ambiguity.

Thanks,
Lijo

> 
> The C99 specification lists this under J.2 Undefined behavior:
> 
> — An expression having signed promoted type is left-shifted and either the value of the
>   expression is negative or the result of shifting would be not be representable in the
>   promoted type (6.5.7).
> 
> So it would be safer to make it unsigned: 0xffffffffu (or just ~0u).
> 
> 
>> -----Original Message-----
>> From: Quan, Evan <Evan.Quan@amd.com>
>> Sent: Tuesday, June 22, 2021 7:56 AM
>> To: Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
>> Subject: RE: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
>>
>> [AMD Official Use Only]
>>
>> Thanks Lijo.
>> However, I'm not quite sure whether " 0xffffffff << (4 * max_wgp_per_sh);" is a valid expression since it kind of triggers some overflow.
>> Can that work for non-x86 platform or even work reliably for x86 platform?
> 
> 
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
  2021-06-22 10:40         ` Lazar, Lijo
@ 2021-06-23  2:04           ` Quan, Evan
  2021-06-23  6:53           ` Michel Dänzer
  1 sibling, 0 replies; 14+ messages in thread
From: Quan, Evan @ 2021-06-23  2:04 UTC (permalink / raw)
  To: Lazar, Lijo, Michel Dänzer; +Cc: Deucher, Alexander, amd-gfx

[AMD Official Use Only]

Thanks for the information. I will use 0xffffffffu then.

BR
Evan
> -----Original Message-----
> From: Lazar, Lijo <Lijo.Lazar@amd.com>
> Sent: Tuesday, June 22, 2021 6:40 PM
> To: Michel Dänzer <michel@daenzer.net>; Quan, Evan
> <Evan.Quan@amd.com>
> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>; amd-
> gfx@lists.freedesktop.org
> Subject: Re: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
> 
> 
> 
> On 6/22/2021 2:49 PM, Michel Dänzer wrote:
> > On 2021-06-22 8:08 a.m., Lazar, Lijo wrote:
> >> [Public]
> >>
> >> AFAIK, that expression is legal (some code analyzer may warn on value of
> 4*max_wgp_per_sh); similar kind is used in rotate shift operations.
> >
> > The default type for constants in C is int, so 0xffffffff is a 32-bit signed
> integer.
> 
> Probably not as per section 6.4.4.
> 
> "The type of an integer constant is the first of the corresponding list in which
> its value can be represented."
> 
> It is a hexadecimal constant and the first to fit this value is unsigned int.
> Regardless, adding u suffix will avoid any ambiguity.
> 
> Thanks,
> Lijo
> 
> >
> > The C99 specification lists this under J.2 Undefined behavior:
> >
> > — An expression having signed promoted type is left-shifted and either
> the value of the
> >   expression is negative or the result of shifting would be not be
> representable in the
> >   promoted type (6.5.7).
> >
> > So it would be safer to make it unsigned: 0xffffffffu (or just ~0u).
> >
> >
> >> -----Original Message-----
> >> From: Quan, Evan <Evan.Quan@amd.com>
> >> Sent: Tuesday, June 22, 2021 7:56 AM
> >> To: Lazar, Lijo <Lijo.Lazar@amd.com>; amd-gfx@lists.freedesktop.org
> >> Cc: Deucher, Alexander <Alexander.Deucher@amd.com>
> >> Subject: RE: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
> >>
> >> [AMD Official Use Only]
> >>
> >> Thanks Lijo.
> >> However, I'm not quite sure whether " 0xffffffff << (4 *
> max_wgp_per_sh);" is a valid expression since it kind of triggers some
> overflow.
> >> Can that work for non-x86 platform or even work reliably for x86 platform?
> >
> >
> >
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting
  2021-06-22 10:40         ` Lazar, Lijo
  2021-06-23  2:04           ` Quan, Evan
@ 2021-06-23  6:53           ` Michel Dänzer
  1 sibling, 0 replies; 14+ messages in thread
From: Michel Dänzer @ 2021-06-23  6:53 UTC (permalink / raw)
  To: Lazar, Lijo, Quan, Evan; +Cc: Deucher, Alexander, amd-gfx

On 2021-06-22 12:40 p.m., Lazar, Lijo wrote:
> On 6/22/2021 2:49 PM, Michel Dänzer wrote:
>> On 2021-06-22 8:08 a.m., Lazar, Lijo wrote:
>>> [Public]
>>>
>>> AFAIK, that expression is legal (some code analyzer may warn on value of 4*max_wgp_per_sh); similar kind is used in rotate shift operations.
>>
>> The default type for constants in C is int, so 0xffffffff is a 32-bit signed integer.
> 
> Probably not as per section 6.4.4.
> 
> "The type of an integer constant is the first of the corresponding list in which its value can be represented."
> 
> It is a hexadecimal constant and the first to fit this value is unsigned int.

Ah, indeed, thanks.


-- 
Earthling Michel Dänzer               |               https://redhat.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2021-06-23  6:53 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-21  7:00 [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting Evan Quan
2021-06-21  7:00 ` [PATCH V3 2/7] drm/amdgpu: fix Navi1x tcp power gating hang when issuing lightweight invalidaiton Evan Quan
2021-06-21  7:00 ` [PATCH V3 3/7] drm/amdgpu: fix NAK-G generation during PCI-e link width switch Evan Quan
2021-06-21  7:00 ` [PATCH V3 4/7] drm/amdgpu: fix the hang caused by PCIe " Evan Quan
2021-06-21  7:00 ` [PATCH V3 5/7] drm/amdgpu: correct clock gating settings on feature unsupported Evan Quan
2021-06-21  7:00 ` [PATCH V3 6/7] drm/amdgpu: update GFX MGCG settings Evan Quan
2021-06-21  7:00 ` [PATCH V3 7/7] drm/amdgpu: update HDP LS settings Evan Quan
2021-06-21 12:07 ` [PATCH V3 1/7] drm/amdgpu: correct tcp harvest setting Lazar, Lijo
2021-06-22  2:25   ` Quan, Evan
2021-06-22  6:08     ` Lazar, Lijo
2021-06-22  9:19       ` Michel Dänzer
2021-06-22 10:40         ` Lazar, Lijo
2021-06-23  2:04           ` Quan, Evan
2021-06-23  6:53           ` Michel Dänzer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.