All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10
@ 2023-04-03  1:59 YiPeng Chai
  2023-04-03  1:59 ` [PATCH 2/2] drm/amdgpu: optimize redundant code in umc_v6_7 YiPeng Chai
  2023-04-03  3:44 ` [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10 Zhou1, Tao
  0 siblings, 2 replies; 6+ messages in thread
From: YiPeng Chai @ 2023-04-03  1:59 UTC (permalink / raw)
  To: amd-gfx
  Cc: Tao.Zhou1, YiPeng Chai, Stanley.Yang, yipechai, Candice.Li,
	Hawking.Zhang

Optimize redundant code in umc_v8_10

Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c |  31 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |   7 +
 drivers/gpu/drm/amd/amdgpu/umc_v8_10.c  | 197 +++++++++---------------
 3 files changed, 115 insertions(+), 120 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 9e2e97207e53..734442315cf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -302,3 +302,34 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
 
 	err_data->err_addr_cnt++;
 }
+
+int amdgpu_umc_scan_all_umc_channels(struct amdgpu_device *adev,
+			umc_func func, void *data)
+{
+	uint32_t node_inst       = 0;
+	uint32_t umc_inst        = 0;
+	uint32_t ch_inst         = 0;
+	int ret = 0;
+
+	if (adev->umc.node_inst_num) {
+		LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
+			ret = func(adev, node_inst, umc_inst, ch_inst, data);
+			if (ret) {
+				dev_err(adev->dev, "Node %d umc %d ch %d func returns %d\n",
+					node_inst, umc_inst, ch_inst, ret);
+				return ret;
+			}
+		}
+	} else {
+		LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
+			ret = func(adev, 0, umc_inst, ch_inst, data);
+			if (ret) {
+				dev_err(adev->dev, "Umc %d ch %d func returns %d\n",
+					umc_inst, ch_inst, ret);
+				return ret;
+			}
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index d7f1229ff11f..f279c8057f96 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -47,6 +47,10 @@
 #define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \
 		LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
 
+
+typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
+			uint32_t umc_inst, uint32_t ch_inst, void *data);
+
 struct amdgpu_umc_ras {
 	struct amdgpu_ras_block_object ras_block;
 	void (*err_cnt_init)(struct amdgpu_device *adev);
@@ -104,4 +108,7 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
 		struct amdgpu_iv_entry *entry);
 int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
 			uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst);
+
+int amdgpu_umc_scan_all_umc_channels(struct amdgpu_device *adev,
+			umc_func func, void *data);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
index fb55e8cb9967..6dff313ac04c 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
@@ -76,10 +76,13 @@ static inline uint32_t get_umc_v8_10_reg_offset(struct amdgpu_device *adev,
 		UMC_8_NODE_DIST * node_inst;
 }
 
-static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev,
-					uint32_t umc_reg_offset)
+static int umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev,
+					uint32_t node_inst, uint32_t umc_inst,
+					uint32_t ch_inst, void *data)
 {
 	uint32_t ecc_err_cnt_addr;
+	uint32_t umc_reg_offset =
+		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
 
 	ecc_err_cnt_addr =
 		SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
@@ -87,24 +90,14 @@ static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev,
 	/* clear error count */
 	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
 			UMC_V8_10_CE_CNT_INIT);
+
+	return 0;
 }
 
 static void umc_v8_10_clear_error_count(struct amdgpu_device *adev)
 {
-	uint32_t node_inst       = 0;
-	uint32_t umc_inst        = 0;
-	uint32_t ch_inst         = 0;
-	uint32_t umc_reg_offset  = 0;
-
-	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
-		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
-						node_inst,
-						umc_inst,
-						ch_inst);
-
-		umc_v8_10_clear_error_count_per_channel(adev,
-						umc_reg_offset);
-	}
+	amdgpu_umc_scan_all_umc_channels(adev,
+		umc_v8_10_clear_error_count_per_channel, NULL);
 }
 
 static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev,
@@ -147,29 +140,29 @@ static void umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev
 		*error_count += 1;
 }
 
+static int umc_v8_10_query_ecc_error_count(struct amdgpu_device *adev,
+					uint32_t node_inst, uint32_t umc_inst,
+					uint32_t ch_inst, void *data)
+{
+	struct ras_err_data *err_data = (struct ras_err_data *)data;
+	uint32_t umc_reg_offset =
+		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
+
+	umc_v8_10_query_correctable_error_count(adev,
+					umc_reg_offset,
+					&(err_data->ce_count));
+	umc_v8_10_query_uncorrectable_error_count(adev,
+					umc_reg_offset,
+					&(err_data->ue_count));
+
+	return 0;
+}
+
 static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev,
 					   void *ras_error_status)
 {
-	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-
-	uint32_t node_inst       = 0;
-	uint32_t umc_inst        = 0;
-	uint32_t ch_inst         = 0;
-	uint32_t umc_reg_offset  = 0;
-
-	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
-		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
-						node_inst,
-						umc_inst,
-						ch_inst);
-
-		umc_v8_10_query_correctable_error_count(adev,
-						umc_reg_offset,
-						&(err_data->ce_count));
-		umc_v8_10_query_uncorrectable_error_count(adev,
-						umc_reg_offset,
-						&(err_data->ue_count));
-	}
+	amdgpu_umc_scan_all_umc_channels(adev,
+		umc_v8_10_query_ecc_error_count, ras_error_status);
 
 	umc_v8_10_clear_error_count(adev);
 }
@@ -248,28 +241,28 @@ static void umc_v8_10_convert_error_address(struct amdgpu_device *adev,
 	}
 }
 
-static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
-					 struct ras_err_data *err_data,
-					 uint32_t umc_reg_offset,
-					 uint32_t node_inst,
-					 uint32_t ch_inst,
-					 uint32_t umc_inst)
+static int umc_v8_10_query_error_address(struct amdgpu_device *adev,
+					uint32_t node_inst, uint32_t umc_inst,
+					uint32_t ch_inst, void *data)
 {
 	uint64_t mc_umc_status_addr;
 	uint64_t mc_umc_status, err_addr;
 	uint64_t mc_umc_addrt0;
+	struct ras_err_data *err_data = (struct ras_err_data *)data;
+	uint32_t umc_reg_offset =
+		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
 
 	mc_umc_status_addr =
 		SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
 	mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
 
 	if (mc_umc_status == 0)
-		return;
+		return 0;
 
 	if (!err_data->err_addr) {
 		/* clear umc status */
 		WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
-		return;
+		return 0;
 	}
 
 	/* calculate error address if ue error is detected */
@@ -287,37 +280,25 @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
 
 	/* clear umc status */
 	WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+
+	return 0;
 }
 
 static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev,
 					     void *ras_error_status)
 {
-	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-	uint32_t node_inst       = 0;
-	uint32_t umc_inst        = 0;
-	uint32_t ch_inst         = 0;
-	uint32_t umc_reg_offset  = 0;
-
-	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
-		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
-						node_inst,
-						umc_inst,
-						ch_inst);
-
-		umc_v8_10_query_error_address(adev,
-					err_data,
-					umc_reg_offset,
-					node_inst,
-					ch_inst,
-					umc_inst);
-	}
+	amdgpu_umc_scan_all_umc_channels(adev,
+		umc_v8_10_query_error_address, ras_error_status);
 }
 
-static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
-					      uint32_t umc_reg_offset)
+static int umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
+					uint32_t node_inst, uint32_t umc_inst,
+					uint32_t ch_inst, void *data)
 {
 	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
 	uint32_t ecc_err_cnt_addr;
+	uint32_t umc_reg_offset =
+		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
 
 	ecc_err_cnt_sel_addr =
 		SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel);
@@ -332,23 +313,14 @@ static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
 	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
 	/* set error count to initial value */
 	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT);
+
+	return 0;
 }
 
 static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev)
 {
-	uint32_t node_inst       = 0;
-	uint32_t umc_inst        = 0;
-	uint32_t ch_inst         = 0;
-	uint32_t umc_reg_offset  = 0;
-
-	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
-		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
-						node_inst,
-						umc_inst,
-						ch_inst);
-
-		umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset);
-	}
+	amdgpu_umc_scan_all_umc_channels(adev,
+		umc_v8_10_err_cnt_init_per_channel, NULL);
 }
 
 static bool umc_v8_10_query_ras_poison_mode(struct amdgpu_device *adev)
@@ -406,37 +378,35 @@ static void umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_dev
 	}
 }
 
+static int umc_v8_10_ecc_info_query_ecc_error_count(struct amdgpu_device *adev,
+					uint32_t node_inst, uint32_t umc_inst,
+					uint32_t ch_inst, void *data)
+{
+	struct ras_err_data *err_data = (struct ras_err_data *)data;
+
+	umc_v8_10_ecc_info_query_correctable_error_count(adev,
+					node_inst, umc_inst, ch_inst,
+					&(err_data->ce_count));
+	umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
+					node_inst, umc_inst, ch_inst,
+					&(err_data->ue_count));
+	return 0;
+}
+
 static void umc_v8_10_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
 					void *ras_error_status)
 {
-	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-
-	uint32_t node_inst       = 0;
-	uint32_t umc_inst        = 0;
-	uint32_t ch_inst         = 0;
-
-	/* TODO: driver needs to toggle DF Cstate to ensure
-	 * safe access of UMC registers. Will add the protection
-	 */
-	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
-		umc_v8_10_ecc_info_query_correctable_error_count(adev,
-							node_inst, umc_inst, ch_inst,
-							&(err_data->ce_count));
-		umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
-							node_inst, umc_inst, ch_inst,
-							&(err_data->ue_count));
-	}
+	amdgpu_umc_scan_all_umc_channels(adev,
+		umc_v8_10_ecc_info_query_ecc_error_count, ras_error_status);
 }
 
-static void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev,
-					struct ras_err_data *err_data,
-					uint32_t ch_inst,
-					uint32_t umc_inst,
-					uint32_t node_inst)
+static int umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev,
+					uint32_t node_inst, uint32_t umc_inst,
+					uint32_t ch_inst, void *data)
 {
 	uint32_t eccinfo_table_idx;
 	uint64_t mc_umc_status, err_addr;
-
+	struct ras_err_data *err_data = (struct ras_err_data *)data;
 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
 
 	eccinfo_table_idx = node_inst * adev->umc.umc_inst_num *
@@ -447,10 +417,10 @@ static void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev,
 	mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
 
 	if (mc_umc_status == 0)
-		return;
+		return 0;
 
 	if (!err_data->err_addr)
-		return;
+		return 0;
 
 	/* calculate error address if ue error is detected */
 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
@@ -463,28 +433,15 @@ static void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev,
 		umc_v8_10_convert_error_address(adev, err_data, err_addr,
 					ch_inst, umc_inst, node_inst, mc_umc_status);
 	}
+
+	return 0;
 }
 
 static void umc_v8_10_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
 					void *ras_error_status)
 {
-	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-
-	uint32_t node_inst       = 0;
-	uint32_t umc_inst        = 0;
-	uint32_t ch_inst         = 0;
-
-	/* TODO: driver needs to toggle DF Cstate to ensure
-	 * safe access of UMC resgisters. Will add the protection
-	 * when firmware interface is ready
-	 */
-	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
-		umc_v8_10_ecc_info_query_error_address(adev,
-						err_data,
-						ch_inst,
-						umc_inst,
-						node_inst);
-	}
+	amdgpu_umc_scan_all_umc_channels(adev,
+		umc_v8_10_ecc_info_query_error_address, ras_error_status);
 }
 
 const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] drm/amdgpu: optimize redundant code in umc_v6_7
  2023-04-03  1:59 [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10 YiPeng Chai
@ 2023-04-03  1:59 ` YiPeng Chai
  2023-04-03  3:44 ` [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10 Zhou1, Tao
  1 sibling, 0 replies; 6+ messages in thread
From: YiPeng Chai @ 2023-04-03  1:59 UTC (permalink / raw)
  To: amd-gfx
  Cc: Tao.Zhou1, YiPeng Chai, Stanley.Yang, yipechai, Candice.Li,
	Hawking.Zhang

Optimize redundant code in umc_v6_7.

Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 162 +++++++++++---------------
 1 file changed, 71 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index e08e25a3a1a9..f360362ba5dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -160,24 +160,28 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev
 	}
 }
 
+static int umc_v6_7_ecc_info_querry_ecc_error_count(struct amdgpu_device *adev,
+					uint32_t node_inst, uint32_t umc_inst,
+					uint32_t ch_inst, void *data)
+{
+	struct ras_err_data *err_data = (struct ras_err_data *)data;
+
+	umc_v6_7_ecc_info_query_correctable_error_count(adev,
+		umc_inst, ch_inst,
+		&(err_data->ce_count));
+
+	umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev,
+		umc_inst, ch_inst,
+		&(err_data->ue_count));
+
+	return 0;
+}
+
 static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
 					   void *ras_error_status)
 {
-	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-
-	uint32_t umc_inst        = 0;
-	uint32_t ch_inst         = 0;
-
-	/*TODO: driver needs to toggle DF Cstate to ensure
-	 * safe access of UMC registers. Will add the protection */
-	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
-		umc_v6_7_ecc_info_query_correctable_error_count(adev,
-						      umc_inst, ch_inst,
-						      &(err_data->ce_count));
-		umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev,
-						      umc_inst, ch_inst,
-							  &(err_data->ue_count));
-	}
+	amdgpu_umc_scan_all_umc_channels(adev,
+		umc_v6_7_ecc_info_querry_ecc_error_count, ras_error_status);
 }
 
 void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
@@ -215,23 +219,23 @@ void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
 	}
 }
 
-static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
-					 struct ras_err_data *err_data,
-					 uint32_t ch_inst,
-					 uint32_t umc_inst)
+static int umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
+					uint32_t node_inst, uint32_t umc_inst,
+					uint32_t ch_inst, void *data)
 {
 	uint64_t mc_umc_status, err_addr;
 	uint32_t eccinfo_table_idx;
 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+	struct ras_err_data *err_data = (struct ras_err_data *)data;
 
 	eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
 	mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
 
 	if (mc_umc_status == 0)
-		return;
+		return 0;
 
 	if (!err_data->err_addr)
-		return;
+		return 0;
 
 	/* calculate error address if ue error is detected */
 	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
@@ -243,25 +247,15 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
 		umc_v6_7_convert_error_address(adev, err_data, err_addr,
 					ch_inst, umc_inst);
 	}
+
+	return 0;
 }
 
 static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
 					     void *ras_error_status)
 {
-	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-
-	uint32_t umc_inst        = 0;
-	uint32_t ch_inst         = 0;
-
-	/*TODO: driver needs to toggle DF Cstate to ensure
-	 * safe access of UMC resgisters. Will add the protection
-	 * when firmware interface is ready */
-	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
-		umc_v6_7_ecc_info_query_error_address(adev,
-					     err_data,
-					     ch_inst,
-					     umc_inst);
-	}
+	amdgpu_umc_scan_all_umc_channels(adev,
+	    umc_v6_7_ecc_info_query_error_address, ras_error_status);
 }
 
 static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
@@ -364,11 +358,14 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev
 	}
 }
 
-static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev,
-						   uint32_t umc_reg_offset)
+static int umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev,
+					uint32_t node_inst, uint32_t umc_inst,
+					uint32_t ch_inst, void *data)
 {
 	uint32_t ecc_err_cnt_addr;
 	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
+	uint32_t umc_reg_offset =
+		get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst);
 
 	ecc_err_cnt_sel_addr =
 		SOC15_REG_OFFSET(UMC, 0,
@@ -402,58 +399,54 @@ static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev,
 	/* clear higher chip error count */
 	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
 			UMC_V6_7_CE_CNT_INIT);
+
+	return 0;
 }
 
 static void umc_v6_7_reset_error_count(struct amdgpu_device *adev)
 {
-	uint32_t umc_inst        = 0;
-	uint32_t ch_inst         = 0;
-	uint32_t umc_reg_offset  = 0;
+	amdgpu_umc_scan_all_umc_channels(adev,
+		umc_v6_7_reset_error_count_per_channel, NULL);
+}
 
-	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
-		umc_reg_offset = get_umc_v6_7_reg_offset(adev,
-							 umc_inst,
-							 ch_inst);
+static int umc_v6_7_query_ecc_error_count(struct amdgpu_device *adev,
+					uint32_t node_inst, uint32_t umc_inst,
+					uint32_t ch_inst, void *data)
+{
+	struct ras_err_data *err_data = (struct ras_err_data *)data;
+	uint32_t umc_reg_offset =
+		get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst);
 
-		umc_v6_7_reset_error_count_per_channel(adev,
-						       umc_reg_offset);
-	}
+	umc_v6_7_query_correctable_error_count(adev,
+					umc_reg_offset,
+					&(err_data->ce_count),
+					ch_inst, umc_inst);
+
+	umc_v6_7_querry_uncorrectable_error_count(adev,
+					umc_reg_offset,
+					&(err_data->ue_count));
+
+	return 0;
 }
 
 static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev,
 					   void *ras_error_status)
 {
-	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-
-	uint32_t umc_inst        = 0;
-	uint32_t ch_inst         = 0;
-	uint32_t umc_reg_offset  = 0;
-
-	/*TODO: driver needs to toggle DF Cstate to ensure
-	 * safe access of UMC registers. Will add the protection */
-	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
-		umc_reg_offset = get_umc_v6_7_reg_offset(adev,
-							 umc_inst,
-							 ch_inst);
-		umc_v6_7_query_correctable_error_count(adev,
-						       umc_reg_offset,
-						       &(err_data->ce_count),
-						       ch_inst, umc_inst);
-		umc_v6_7_querry_uncorrectable_error_count(adev,
-							  umc_reg_offset,
-							  &(err_data->ue_count));
-	}
+	amdgpu_umc_scan_all_umc_channels(adev,
+		umc_v6_7_query_ecc_error_count, ras_error_status);
 
 	umc_v6_7_reset_error_count(adev);
 }
 
-static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
-					 struct ras_err_data *err_data,
-					 uint32_t umc_reg_offset, uint32_t ch_inst,
-					 uint32_t umc_inst)
+static int umc_v6_7_query_error_address(struct amdgpu_device *adev,
+					uint32_t node_inst, uint32_t umc_inst,
+					uint32_t ch_inst, void *data)
 {
 	uint32_t mc_umc_status_addr;
 	uint64_t mc_umc_status = 0, mc_umc_addrt0, err_addr;
+	struct ras_err_data *err_data = (struct ras_err_data *)data;
+	uint32_t umc_reg_offset =
+		get_umc_v6_7_reg_offset(adev, umc_inst, ch_inst);
 
 	mc_umc_status_addr =
 		SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
@@ -463,12 +456,12 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
 	mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
 
 	if (mc_umc_status == 0)
-		return;
+		return 0;
 
 	if (!err_data->err_addr) {
 		/* clear umc status */
 		WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
-		return;
+		return 0;
 	}
 
 	/* calculate error address if ue error is detected */
@@ -484,29 +477,16 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
 
 	/* clear umc status */
 	WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
+
+	return 0;
 }
 
+
 static void umc_v6_7_query_ras_error_address(struct amdgpu_device *adev,
 					     void *ras_error_status)
 {
-	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-
-	uint32_t umc_inst        = 0;
-	uint32_t ch_inst         = 0;
-	uint32_t umc_reg_offset  = 0;
-
-	/*TODO: driver needs to toggle DF Cstate to ensure
-	 * safe access of UMC resgisters. Will add the protection
-	 * when firmware interface is ready */
-	LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
-		umc_reg_offset = get_umc_v6_7_reg_offset(adev,
-							 umc_inst,
-							 ch_inst);
-		umc_v6_7_query_error_address(adev,
-					     err_data,
-					     umc_reg_offset, ch_inst,
-					     umc_inst);
-	}
+	amdgpu_umc_scan_all_umc_channels(adev,
+		umc_v6_7_query_error_address, ras_error_status);
 }
 
 static uint32_t umc_v6_7_query_ras_poison_mode_per_channel(
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10
  2023-04-03  1:59 [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10 YiPeng Chai
  2023-04-03  1:59 ` [PATCH 2/2] drm/amdgpu: optimize redundant code in umc_v6_7 YiPeng Chai
@ 2023-04-03  3:44 ` Zhou1, Tao
  2023-04-03  7:00   ` Chai, Thomas
  1 sibling, 1 reply; 6+ messages in thread
From: Zhou1, Tao @ 2023-04-03  3:44 UTC (permalink / raw)
  To: Chai, Thomas, amd-gfx; +Cc: Li, Candice, Yang, Stanley, Zhang, Hawking

[AMD Official Use Only - General]



> -----Original Message-----
> From: Chai, Thomas <YiPeng.Chai@amd.com>
> Sent: Monday, April 3, 2023 9:59 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Chai, Thomas <YiPeng.Chai@amd.com>; Zhang, Hawking
> <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Li, Candice
> <Candice.Li@amd.com>; Yang, Stanley <Stanley.Yang@amd.com>; Chai,
> Thomas <YiPeng.Chai@amd.com>
> Subject: [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10
> 
> Optimize redundant code in umc_v8_10
> 
> Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c |  31 ++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |   7 +
>  drivers/gpu/drm/amd/amdgpu/umc_v8_10.c  | 197 +++++++++---------------
>  3 files changed, 115 insertions(+), 120 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> index 9e2e97207e53..734442315cf6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> @@ -302,3 +302,34 @@ void amdgpu_umc_fill_error_record(struct
> ras_err_data *err_data,
> 
>  	err_data->err_addr_cnt++;
>  }
> +
> +int amdgpu_umc_scan_all_umc_channels(struct amdgpu_device *adev,
> +			umc_func func, void *data)
> +{
> +	uint32_t node_inst       = 0;
> +	uint32_t umc_inst        = 0;
> +	uint32_t ch_inst         = 0;
> +	int ret = 0;
> +
> +	if (adev->umc.node_inst_num) {
> +		LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst,
> ch_inst) {
> +			ret = func(adev, node_inst, umc_inst, ch_inst, data);
> +			if (ret) {
> +				dev_err(adev->dev, "Node %d umc %d ch %d
> func returns %d\n",
> +					node_inst, umc_inst, ch_inst, ret);
> +				return ret;
> +			}
> +		}
> +	} else {
> +		LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {

[Tao] for ASIC which doesn't support node, can we set its node_inst_num to 1 and retire the macro LOOP_UMC_INST_AND_CH?

> +			ret = func(adev, 0, umc_inst, ch_inst, data);
> +			if (ret) {
> +				dev_err(adev->dev, "Umc %d ch %d func
> returns %d\n",
> +					umc_inst, ch_inst, ret);
> +				return ret;
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> index d7f1229ff11f..f279c8057f96 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> @@ -47,6 +47,10 @@
>  #define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> \
>  		LOOP_UMC_NODE_INST((node_inst))
> LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
> 
> +
> +typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
> +			uint32_t umc_inst, uint32_t ch_inst, void *data);
> +
>  struct amdgpu_umc_ras {
>  	struct amdgpu_ras_block_object ras_block;
>  	void (*err_cnt_init)(struct amdgpu_device *adev); @@ -104,4 +108,7
> @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
>  		struct amdgpu_iv_entry *entry);
>  int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
>  			uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst);
> +
> +int amdgpu_umc_scan_all_umc_channels(struct amdgpu_device *adev,
> +			umc_func func, void *data);
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> index fb55e8cb9967..6dff313ac04c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> @@ -76,10 +76,13 @@ static inline uint32_t get_umc_v8_10_reg_offset(struct
> amdgpu_device *adev,
>  		UMC_8_NODE_DIST * node_inst;
>  }
> 
> -static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device
> *adev,
> -					uint32_t umc_reg_offset)
> +static int umc_v8_10_clear_error_count_per_channel(struct amdgpu_device
> *adev,
> +					uint32_t node_inst, uint32_t umc_inst,
> +					uint32_t ch_inst, void *data)
>  {
>  	uint32_t ecc_err_cnt_addr;
> +	uint32_t umc_reg_offset =
> +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> 
>  	ecc_err_cnt_addr =
>  		SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); @@
> -87,24 +90,14 @@ static void
> umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev,
>  	/* clear error count */
>  	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
>  			UMC_V8_10_CE_CNT_INIT);
> +
> +	return 0;
>  }
> 
>  static void umc_v8_10_clear_error_count(struct amdgpu_device *adev)  {
> -	uint32_t node_inst       = 0;
> -	uint32_t umc_inst        = 0;
> -	uint32_t ch_inst         = 0;
> -	uint32_t umc_reg_offset  = 0;
> -
> -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> -						node_inst,
> -						umc_inst,
> -						ch_inst);
> -
> -		umc_v8_10_clear_error_count_per_channel(adev,
> -						umc_reg_offset);
> -	}
> +	amdgpu_umc_scan_all_umc_channels(adev,
> +		umc_v8_10_clear_error_count_per_channel, NULL);
>  }
> 
>  static void umc_v8_10_query_correctable_error_count(struct amdgpu_device
> *adev, @@ -147,29 +140,29 @@ static void
> umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev
>  		*error_count += 1;
>  }
> 
> +static int umc_v8_10_query_ecc_error_count(struct amdgpu_device *adev,
> +					uint32_t node_inst, uint32_t umc_inst,
> +					uint32_t ch_inst, void *data)
> +{
> +	struct ras_err_data *err_data = (struct ras_err_data *)data;
> +	uint32_t umc_reg_offset =
> +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> +
> +	umc_v8_10_query_correctable_error_count(adev,
> +					umc_reg_offset,
> +					&(err_data->ce_count));
> +	umc_v8_10_query_uncorrectable_error_count(adev,
> +					umc_reg_offset,
> +					&(err_data->ue_count));
> +
> +	return 0;
> +}
> +
>  static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev,
>  					   void *ras_error_status)
>  {
> -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> -
> -	uint32_t node_inst       = 0;
> -	uint32_t umc_inst        = 0;
> -	uint32_t ch_inst         = 0;
> -	uint32_t umc_reg_offset  = 0;
> -
> -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> -						node_inst,
> -						umc_inst,
> -						ch_inst);
> -
> -		umc_v8_10_query_correctable_error_count(adev,
> -						umc_reg_offset,
> -						&(err_data->ce_count));
> -		umc_v8_10_query_uncorrectable_error_count(adev,
> -						umc_reg_offset,
> -						&(err_data->ue_count));
> -	}
> +	amdgpu_umc_scan_all_umc_channels(adev,
> +		umc_v8_10_query_ecc_error_count, ras_error_status);
> 
>  	umc_v8_10_clear_error_count(adev);
>  }
> @@ -248,28 +241,28 @@ static void umc_v8_10_convert_error_address(struct
> amdgpu_device *adev,
>  	}
>  }
> 
> -static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
> -					 struct ras_err_data *err_data,
> -					 uint32_t umc_reg_offset,
> -					 uint32_t node_inst,
> -					 uint32_t ch_inst,
> -					 uint32_t umc_inst)
> +static int umc_v8_10_query_error_address(struct amdgpu_device *adev,
> +					uint32_t node_inst, uint32_t umc_inst,
> +					uint32_t ch_inst, void *data)
>  {
>  	uint64_t mc_umc_status_addr;
>  	uint64_t mc_umc_status, err_addr;
>  	uint64_t mc_umc_addrt0;
> +	struct ras_err_data *err_data = (struct ras_err_data *)data;
> +	uint32_t umc_reg_offset =
> +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> 
>  	mc_umc_status_addr =
>  		SOC15_REG_OFFSET(UMC, 0,
> regMCA_UMC_UMC0_MCUMC_STATUST0);
>  	mc_umc_status = RREG64_PCIE((mc_umc_status_addr +
> umc_reg_offset) * 4);
> 
>  	if (mc_umc_status == 0)
> -		return;
> +		return 0;
> 
>  	if (!err_data->err_addr) {
>  		/* clear umc status */
>  		WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4,
> 0x0ULL);
> -		return;
> +		return 0;
>  	}
> 
>  	/* calculate error address if ue error is detected */ @@ -287,37 +280,25
> @@ static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
> 
>  	/* clear umc status */
>  	WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
> +
> +	return 0;
>  }
> 
>  static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev,
>  					     void *ras_error_status)
>  {
> -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> -	uint32_t node_inst       = 0;
> -	uint32_t umc_inst        = 0;
> -	uint32_t ch_inst         = 0;
> -	uint32_t umc_reg_offset  = 0;
> -
> -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> -						node_inst,
> -						umc_inst,
> -						ch_inst);
> -
> -		umc_v8_10_query_error_address(adev,
> -					err_data,
> -					umc_reg_offset,
> -					node_inst,
> -					ch_inst,
> -					umc_inst);
> -	}
> +	amdgpu_umc_scan_all_umc_channels(adev,
> +		umc_v8_10_query_error_address, ras_error_status);
>  }
> 
> -static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
> -					      uint32_t umc_reg_offset)
> +static int umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
> +					uint32_t node_inst, uint32_t umc_inst,
> +					uint32_t ch_inst, void *data)
>  {
>  	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
>  	uint32_t ecc_err_cnt_addr;
> +	uint32_t umc_reg_offset =
> +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> 
>  	ecc_err_cnt_sel_addr =
>  		SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel);
> @@ -332,23 +313,14 @@ static void
> umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
>  	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
> ecc_err_cnt_sel);
>  	/* set error count to initial value */
>  	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
> UMC_V8_10_CE_CNT_INIT);
> +
> +	return 0;
>  }
> 
>  static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev)  {
> -	uint32_t node_inst       = 0;
> -	uint32_t umc_inst        = 0;
> -	uint32_t ch_inst         = 0;
> -	uint32_t umc_reg_offset  = 0;
> -
> -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> -						node_inst,
> -						umc_inst,
> -						ch_inst);
> -
> -		umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset);
> -	}
> +	amdgpu_umc_scan_all_umc_channels(adev,
> +		umc_v8_10_err_cnt_init_per_channel, NULL);
>  }
> 
>  static bool umc_v8_10_query_ras_poison_mode(struct amdgpu_device *adev)
> @@ -406,37 +378,35 @@ static void
> umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_dev
>  	}
>  }
> 
> +static int umc_v8_10_ecc_info_query_ecc_error_count(struct amdgpu_device
> *adev,
> +					uint32_t node_inst, uint32_t umc_inst,
> +					uint32_t ch_inst, void *data)
> +{
> +	struct ras_err_data *err_data = (struct ras_err_data *)data;
> +
> +	umc_v8_10_ecc_info_query_correctable_error_count(adev,
> +					node_inst, umc_inst, ch_inst,
> +					&(err_data->ce_count));
> +	umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
> +					node_inst, umc_inst, ch_inst,
> +					&(err_data->ue_count));
> +	return 0;
> +}
> +
>  static void umc_v8_10_ecc_info_query_ras_error_count(struct amdgpu_device
> *adev,
>  					void *ras_error_status)
>  {
> -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> -
> -	uint32_t node_inst       = 0;
> -	uint32_t umc_inst        = 0;
> -	uint32_t ch_inst         = 0;
> -
> -	/* TODO: driver needs to toggle DF Cstate to ensure
> -	 * safe access of UMC registers. Will add the protection
> -	 */
> -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> -		umc_v8_10_ecc_info_query_correctable_error_count(adev,
> -							node_inst, umc_inst,
> ch_inst,
> -							&(err_data-
> >ce_count));
> -		umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
> -							node_inst, umc_inst,
> ch_inst,
> -							&(err_data-
> >ue_count));
> -	}
> +	amdgpu_umc_scan_all_umc_channels(adev,
> +		umc_v8_10_ecc_info_query_ecc_error_count,
> ras_error_status);
>  }
> 
> -static void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device
> *adev,
> -					struct ras_err_data *err_data,
> -					uint32_t ch_inst,
> -					uint32_t umc_inst,
> -					uint32_t node_inst)
> +static int umc_v8_10_ecc_info_query_error_address(struct amdgpu_device
> *adev,
> +					uint32_t node_inst, uint32_t umc_inst,
> +					uint32_t ch_inst, void *data)
>  {
>  	uint32_t eccinfo_table_idx;
>  	uint64_t mc_umc_status, err_addr;
> -
> +	struct ras_err_data *err_data = (struct ras_err_data *)data;
>  	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> 
>  	eccinfo_table_idx = node_inst * adev->umc.umc_inst_num * @@ -
> 447,10 +417,10 @@ static void
> umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev,
>  	mc_umc_status = ras-
> >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
> 
>  	if (mc_umc_status == 0)
> -		return;
> +		return 0;
> 
>  	if (!err_data->err_addr)
> -		return;
> +		return 0;
> 
>  	/* calculate error address if ue error is detected */
>  	if (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && @@ -463,28 +433,15 @@
> static void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device
> *adev,
>  		umc_v8_10_convert_error_address(adev, err_data, err_addr,
>  					ch_inst, umc_inst, node_inst,
> mc_umc_status);
>  	}
> +
> +	return 0;
>  }
> 
>  static void umc_v8_10_ecc_info_query_ras_error_address(struct
> amdgpu_device *adev,
>  					void *ras_error_status)
>  {
> -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> -
> -	uint32_t node_inst       = 0;
> -	uint32_t umc_inst        = 0;
> -	uint32_t ch_inst         = 0;
> -
> -	/* TODO: driver needs to toggle DF Cstate to ensure
> -	 * safe access of UMC resgisters. Will add the protection
> -	 * when firmware interface is ready
> -	 */
> -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> -		umc_v8_10_ecc_info_query_error_address(adev,
> -						err_data,
> -						ch_inst,
> -						umc_inst,
> -						node_inst);
> -	}
> +	amdgpu_umc_scan_all_umc_channels(adev,
> +		umc_v8_10_ecc_info_query_error_address, ras_error_status);
>  }
> 
>  const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
> --
> 2.34.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10
  2023-04-03  3:44 ` [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10 Zhou1, Tao
@ 2023-04-03  7:00   ` Chai, Thomas
  2023-04-03  7:21     ` Zhou1, Tao
  0 siblings, 1 reply; 6+ messages in thread
From: Chai, Thomas @ 2023-04-03  7:00 UTC (permalink / raw)
  To: Zhou1, Tao, amd-gfx; +Cc: Li, Candice, Yang, Stanley, Zhang, Hawking

[AMD Official Use Only - General]




-----------------
Best Regards,
Thomas

-----Original Message-----
From: Zhou1, Tao <Tao.Zhou1@amd.com> 
Sent: Monday, April 3, 2023 11:45 AM
To: Chai, Thomas <YiPeng.Chai@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>; Li, Candice <Candice.Li@amd.com>; Yang, Stanley <Stanley.Yang@amd.com>
Subject: RE: [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10

[AMD Official Use Only - General]



> -----Original Message-----
> From: Chai, Thomas <YiPeng.Chai@amd.com>
> Sent: Monday, April 3, 2023 9:59 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Chai, Thomas <YiPeng.Chai@amd.com>; Zhang, Hawking 
> <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Li, Candice 
> <Candice.Li@amd.com>; Yang, Stanley <Stanley.Yang@amd.com>; Chai, 
> Thomas <YiPeng.Chai@amd.com>
> Subject: [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10
> 
> Optimize redundant code in umc_v8_10
> 
> Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c |  31 ++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |   7 +
>  drivers/gpu/drm/amd/amdgpu/umc_v8_10.c  | 197 
> +++++++++---------------
>  3 files changed, 115 insertions(+), 120 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> index 9e2e97207e53..734442315cf6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> @@ -302,3 +302,34 @@ void amdgpu_umc_fill_error_record(struct
> ras_err_data *err_data,
> 
>  	err_data->err_addr_cnt++;
>  }
> +
> +int amdgpu_umc_scan_all_umc_channels(struct amdgpu_device *adev,
> +			umc_func func, void *data)
> +{
> +	uint32_t node_inst       = 0;
> +	uint32_t umc_inst        = 0;
> +	uint32_t ch_inst         = 0;
> +	int ret = 0;
> +
> +	if (adev->umc.node_inst_num) {
> +		LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst,
> ch_inst) {
> +			ret = func(adev, node_inst, umc_inst, ch_inst, data);
> +			if (ret) {
> +				dev_err(adev->dev, "Node %d umc %d ch %d
> func returns %d\n",
> +					node_inst, umc_inst, ch_inst, ret);
> +				return ret;
> +			}
> +		}
> +	} else {
> +		LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {

>[Tao] for ASIC which doesn't support node, can we set its node_inst_num to 1 and retire the macro LOOP_UMC_INST_AND_CH?

[Thomas] I am afraid not.
	
	" #define LOOP_UMC_NODE_INST(node_inst) \
		for_each_set_bit((node_inst), &(adev->umc.active_mask), adev->umc.node_inst_num) "
	
	The node instance loop of LOOP_UMC_EACH_NODE_INST_AND_CH  supports node harvest, so node_inst_num is not the real node instance number.


> +			ret = func(adev, 0, umc_inst, ch_inst, data);
> +			if (ret) {
> +				dev_err(adev->dev, "Umc %d ch %d func
> returns %d\n",
> +					umc_inst, ch_inst, ret);
> +				return ret;
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> index d7f1229ff11f..f279c8057f96 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> @@ -47,6 +47,10 @@
>  #define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) 
> \
>  		LOOP_UMC_NODE_INST((node_inst))
> LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
> 
> +
> +typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
> +			uint32_t umc_inst, uint32_t ch_inst, void *data);
> +
>  struct amdgpu_umc_ras {
>  	struct amdgpu_ras_block_object ras_block;
>  	void (*err_cnt_init)(struct amdgpu_device *adev); @@ -104,4 +108,7 
> @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
>  		struct amdgpu_iv_entry *entry);
>  int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
>  			uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst);
> +
> +int amdgpu_umc_scan_all_umc_channels(struct amdgpu_device *adev,
> +			umc_func func, void *data);
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> index fb55e8cb9967..6dff313ac04c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> @@ -76,10 +76,13 @@ static inline uint32_t 
> get_umc_v8_10_reg_offset(struct amdgpu_device *adev,
>  		UMC_8_NODE_DIST * node_inst;
>  }
> 
> -static void umc_v8_10_clear_error_count_per_channel(struct 
> amdgpu_device *adev,
> -					uint32_t umc_reg_offset)
> +static int umc_v8_10_clear_error_count_per_channel(struct 
> +amdgpu_device
> *adev,
> +					uint32_t node_inst, uint32_t umc_inst,
> +					uint32_t ch_inst, void *data)
>  {
>  	uint32_t ecc_err_cnt_addr;
> +	uint32_t umc_reg_offset =
> +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> 
>  	ecc_err_cnt_addr =
>  		SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); @@
> -87,24 +90,14 @@ static void
> umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev,
>  	/* clear error count */
>  	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
>  			UMC_V8_10_CE_CNT_INIT);
> +
> +	return 0;
>  }
> 
>  static void umc_v8_10_clear_error_count(struct amdgpu_device *adev)  {
> -	uint32_t node_inst       = 0;
> -	uint32_t umc_inst        = 0;
> -	uint32_t ch_inst         = 0;
> -	uint32_t umc_reg_offset  = 0;
> -
> -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> -						node_inst,
> -						umc_inst,
> -						ch_inst);
> -
> -		umc_v8_10_clear_error_count_per_channel(adev,
> -						umc_reg_offset);
> -	}
> +	amdgpu_umc_scan_all_umc_channels(adev,
> +		umc_v8_10_clear_error_count_per_channel, NULL);
>  }
> 
>  static void umc_v8_10_query_correctable_error_count(struct 
> amdgpu_device *adev, @@ -147,29 +140,29 @@ static void 
> umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev
>  		*error_count += 1;
>  }
> 
> +static int umc_v8_10_query_ecc_error_count(struct amdgpu_device *adev,
> +					uint32_t node_inst, uint32_t umc_inst,
> +					uint32_t ch_inst, void *data)
> +{
> +	struct ras_err_data *err_data = (struct ras_err_data *)data;
> +	uint32_t umc_reg_offset =
> +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> +
> +	umc_v8_10_query_correctable_error_count(adev,
> +					umc_reg_offset,
> +					&(err_data->ce_count));
> +	umc_v8_10_query_uncorrectable_error_count(adev,
> +					umc_reg_offset,
> +					&(err_data->ue_count));
> +
> +	return 0;
> +}
> +
>  static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev,
>  					   void *ras_error_status)
>  {
> -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> -
> -	uint32_t node_inst       = 0;
> -	uint32_t umc_inst        = 0;
> -	uint32_t ch_inst         = 0;
> -	uint32_t umc_reg_offset  = 0;
> -
> -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> -						node_inst,
> -						umc_inst,
> -						ch_inst);
> -
> -		umc_v8_10_query_correctable_error_count(adev,
> -						umc_reg_offset,
> -						&(err_data->ce_count));
> -		umc_v8_10_query_uncorrectable_error_count(adev,
> -						umc_reg_offset,
> -						&(err_data->ue_count));
> -	}
> +	amdgpu_umc_scan_all_umc_channels(adev,
> +		umc_v8_10_query_ecc_error_count, ras_error_status);
> 
>  	umc_v8_10_clear_error_count(adev);
>  }
> @@ -248,28 +241,28 @@ static void 
> umc_v8_10_convert_error_address(struct
> amdgpu_device *adev,
>  	}
>  }
> 
> -static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
> -					 struct ras_err_data *err_data,
> -					 uint32_t umc_reg_offset,
> -					 uint32_t node_inst,
> -					 uint32_t ch_inst,
> -					 uint32_t umc_inst)
> +static int umc_v8_10_query_error_address(struct amdgpu_device *adev,
> +					uint32_t node_inst, uint32_t umc_inst,
> +					uint32_t ch_inst, void *data)
>  {
>  	uint64_t mc_umc_status_addr;
>  	uint64_t mc_umc_status, err_addr;
>  	uint64_t mc_umc_addrt0;
> +	struct ras_err_data *err_data = (struct ras_err_data *)data;
> +	uint32_t umc_reg_offset =
> +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> 
>  	mc_umc_status_addr =
>  		SOC15_REG_OFFSET(UMC, 0,
> regMCA_UMC_UMC0_MCUMC_STATUST0);
>  	mc_umc_status = RREG64_PCIE((mc_umc_status_addr +
> umc_reg_offset) * 4);
> 
>  	if (mc_umc_status == 0)
> -		return;
> +		return 0;
> 
>  	if (!err_data->err_addr) {
>  		/* clear umc status */
>  		WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
> -		return;
> +		return 0;
>  	}
> 
>  	/* calculate error address if ue error is detected */ @@ -287,37 
> +280,25 @@ static void umc_v8_10_query_error_address(struct 
> amdgpu_device *adev,
> 
>  	/* clear umc status */
>  	WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
> +
> +	return 0;
>  }
> 
>  static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev,
>  					     void *ras_error_status)
>  {
> -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> -	uint32_t node_inst       = 0;
> -	uint32_t umc_inst        = 0;
> -	uint32_t ch_inst         = 0;
> -	uint32_t umc_reg_offset  = 0;
> -
> -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> -						node_inst,
> -						umc_inst,
> -						ch_inst);
> -
> -		umc_v8_10_query_error_address(adev,
> -					err_data,
> -					umc_reg_offset,
> -					node_inst,
> -					ch_inst,
> -					umc_inst);
> -	}
> +	amdgpu_umc_scan_all_umc_channels(adev,
> +		umc_v8_10_query_error_address, ras_error_status);
>  }
> 
> -static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
> -					      uint32_t umc_reg_offset)
> +static int umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
> +					uint32_t node_inst, uint32_t umc_inst,
> +					uint32_t ch_inst, void *data)
>  {
>  	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
>  	uint32_t ecc_err_cnt_addr;
> +	uint32_t umc_reg_offset =
> +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> 
>  	ecc_err_cnt_sel_addr =
>  		SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel); @@ -332,23 
> +313,14 @@ static void umc_v8_10_err_cnt_init_per_channel(struct 
> amdgpu_device *adev,
>  	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, 
> ecc_err_cnt_sel);
>  	/* set error count to initial value */
>  	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, 
> UMC_V8_10_CE_CNT_INIT);
> +
> +	return 0;
>  }
> 
>  static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev)  {
> -	uint32_t node_inst       = 0;
> -	uint32_t umc_inst        = 0;
> -	uint32_t ch_inst         = 0;
> -	uint32_t umc_reg_offset  = 0;
> -
> -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> -						node_inst,
> -						umc_inst,
> -						ch_inst);
> -
> -		umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset);
> -	}
> +	amdgpu_umc_scan_all_umc_channels(adev,
> +		umc_v8_10_err_cnt_init_per_channel, NULL);
>  }
> 
>  static bool umc_v8_10_query_ras_poison_mode(struct amdgpu_device 
> *adev) @@ -406,37 +378,35 @@ static void 
> umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_dev
>  	}
>  }
> 
> +static int umc_v8_10_ecc_info_query_ecc_error_count(struct 
> +amdgpu_device
> *adev,
> +					uint32_t node_inst, uint32_t umc_inst,
> +					uint32_t ch_inst, void *data)
> +{
> +	struct ras_err_data *err_data = (struct ras_err_data *)data;
> +
> +	umc_v8_10_ecc_info_query_correctable_error_count(adev,
> +					node_inst, umc_inst, ch_inst,
> +					&(err_data->ce_count));
> +	umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
> +					node_inst, umc_inst, ch_inst,
> +					&(err_data->ue_count));
> +	return 0;
> +}
> +
>  static void umc_v8_10_ecc_info_query_ras_error_count(struct 
> amdgpu_device *adev,
>  					void *ras_error_status)
>  {
> -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> -
> -	uint32_t node_inst       = 0;
> -	uint32_t umc_inst        = 0;
> -	uint32_t ch_inst         = 0;
> -
> -	/* TODO: driver needs to toggle DF Cstate to ensure
> -	 * safe access of UMC registers. Will add the protection
> -	 */
> -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> -		umc_v8_10_ecc_info_query_correctable_error_count(adev,
> -							node_inst, umc_inst,
> ch_inst,
> -							&(err_data-
> >ce_count));
> -		umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
> -							node_inst, umc_inst,
> ch_inst,
> -							&(err_data-
> >ue_count));
> -	}
> +	amdgpu_umc_scan_all_umc_channels(adev,
> +		umc_v8_10_ecc_info_query_ecc_error_count,
> ras_error_status);
>  }
> 
> -static void umc_v8_10_ecc_info_query_error_address(struct 
> amdgpu_device *adev,
> -					struct ras_err_data *err_data,
> -					uint32_t ch_inst,
> -					uint32_t umc_inst,
> -					uint32_t node_inst)
> +static int umc_v8_10_ecc_info_query_error_address(struct 
> +amdgpu_device
> *adev,
> +					uint32_t node_inst, uint32_t umc_inst,
> +					uint32_t ch_inst, void *data)
>  {
>  	uint32_t eccinfo_table_idx;
>  	uint64_t mc_umc_status, err_addr;
> -
> +	struct ras_err_data *err_data = (struct ras_err_data *)data;
>  	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> 
>  	eccinfo_table_idx = node_inst * adev->umc.umc_inst_num * @@ -
> 447,10 +417,10 @@ static void
> umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev,
>  	mc_umc_status = ras-
> >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
> 
>  	if (mc_umc_status == 0)
> -		return;
> +		return 0;
> 
>  	if (!err_data->err_addr)
> -		return;
> +		return 0;
> 
>  	/* calculate error address if ue error is detected */
>  	if (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && @@ -463,28 +433,15 @@ static 
> void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device 
> *adev,
>  		umc_v8_10_convert_error_address(adev, err_data, err_addr,
>  					ch_inst, umc_inst, node_inst,
> mc_umc_status);
>  	}
> +
> +	return 0;
>  }
> 
>  static void umc_v8_10_ecc_info_query_ras_error_address(struct
> amdgpu_device *adev,
>  					void *ras_error_status)
>  {
> -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> -
> -	uint32_t node_inst       = 0;
> -	uint32_t umc_inst        = 0;
> -	uint32_t ch_inst         = 0;
> -
> -	/* TODO: driver needs to toggle DF Cstate to ensure
> -	 * safe access of UMC resgisters. Will add the protection
> -	 * when firmware interface is ready
> -	 */
> -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> {
> -		umc_v8_10_ecc_info_query_error_address(adev,
> -						err_data,
> -						ch_inst,
> -						umc_inst,
> -						node_inst);
> -	}
> +	amdgpu_umc_scan_all_umc_channels(adev,
> +		umc_v8_10_ecc_info_query_error_address, ras_error_status);
>  }
> 
>  const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
> --
> 2.34.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10
  2023-04-03  7:00   ` Chai, Thomas
@ 2023-04-03  7:21     ` Zhou1, Tao
  2023-04-03  7:25       ` Chai, Thomas
  0 siblings, 1 reply; 6+ messages in thread
From: Zhou1, Tao @ 2023-04-03  7:21 UTC (permalink / raw)
  To: Chai, Thomas, amd-gfx; +Cc: Li, Candice, Yang, Stanley, Zhang, Hawking

[AMD Official Use Only - General]



> -----Original Message-----
> From: Chai, Thomas <YiPeng.Chai@amd.com>
> Sent: Monday, April 3, 2023 3:00 PM
> To: Zhou1, Tao <Tao.Zhou1@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking <Hawking.Zhang@amd.com>; Li, Candice
> <Candice.Li@amd.com>; Yang, Stanley <Stanley.Yang@amd.com>
> Subject: RE: [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10
> 
> [AMD Official Use Only - General]
> 
> 
> 
> 
> -----------------
> Best Regards,
> Thomas
> 
> -----Original Message-----
> From: Zhou1, Tao <Tao.Zhou1@amd.com>
> Sent: Monday, April 3, 2023 11:45 AM
> To: Chai, Thomas <YiPeng.Chai@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking <Hawking.Zhang@amd.com>; Li, Candice
> <Candice.Li@amd.com>; Yang, Stanley <Stanley.Yang@amd.com>
> Subject: RE: [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10
> 
> [AMD Official Use Only - General]
> 
> 
> 
> > -----Original Message-----
> > From: Chai, Thomas <YiPeng.Chai@amd.com>
> > Sent: Monday, April 3, 2023 9:59 AM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Chai, Thomas <YiPeng.Chai@amd.com>; Zhang, Hawking
> > <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Li,
> Candice
> > <Candice.Li@amd.com>; Yang, Stanley <Stanley.Yang@amd.com>; Chai,
> > Thomas <YiPeng.Chai@amd.com>
> > Subject: [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10
> >
> > Optimize redundant code in umc_v8_10
> >
> > Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c |  31 ++++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |   7 +
> >  drivers/gpu/drm/amd/amdgpu/umc_v8_10.c  | 197
> > +++++++++---------------
> >  3 files changed, 115 insertions(+), 120 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > index 9e2e97207e53..734442315cf6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > @@ -302,3 +302,34 @@ void amdgpu_umc_fill_error_record(struct
> > ras_err_data *err_data,
> >
> >  	err_data->err_addr_cnt++;
> >  }
> > +
> > +int amdgpu_umc_scan_all_umc_channels(struct amdgpu_device *adev,
> > +			umc_func func, void *data)
> > +{
> > +	uint32_t node_inst       = 0;
> > +	uint32_t umc_inst        = 0;
> > +	uint32_t ch_inst         = 0;
> > +	int ret = 0;
> > +
> > +	if (adev->umc.node_inst_num) {
> > +		LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst,
> > ch_inst) {
> > +			ret = func(adev, node_inst, umc_inst, ch_inst, data);
> > +			if (ret) {
> > +				dev_err(adev->dev, "Node %d umc %d ch %d
> > func returns %d\n",
> > +					node_inst, umc_inst, ch_inst, ret);
> > +				return ret;
> > +			}
> > +		}
> > +	} else {
> > +		LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
> 
> >[Tao] for ASIC which doesn't support node, can we set its node_inst_num to 1
> and retire the macro LOOP_UMC_INST_AND_CH?
> 
> [Thomas] I am afraid not.
> 
> 	" #define LOOP_UMC_NODE_INST(node_inst) \
> 		for_each_set_bit((node_inst), &(adev->umc.active_mask),
> adev->umc.node_inst_num) "
> 
> 	The node instance loop of LOOP_UMC_EACH_NODE_INST_AND_CH
> supports node harvest, so node_inst_num is not the real node instance number.

[Tao] we can set both node_inst_num and active_mask to 1, but either way is fine for me.
BTW, I think amdgpu_umc_loop_channels  is simple than amdgpu_umc_scan_all_umc_channels, with this fixed, the series is:

Reviewed-by: Tao Zhou <tao.zhou1@amd.com>

> 
> 
> > +			ret = func(adev, 0, umc_inst, ch_inst, data);
> > +			if (ret) {
> > +				dev_err(adev->dev, "Umc %d ch %d func
> > returns %d\n",
> > +					umc_inst, ch_inst, ret);
> > +				return ret;
> > +			}
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> > index d7f1229ff11f..f279c8057f96 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> > @@ -47,6 +47,10 @@
> >  #define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst,
> ch_inst)
> > \
> >  		LOOP_UMC_NODE_INST((node_inst))
> > LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
> >
> > +
> > +typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
> > +			uint32_t umc_inst, uint32_t ch_inst, void *data);
> > +
> >  struct amdgpu_umc_ras {
> >  	struct amdgpu_ras_block_object ras_block;
> >  	void (*err_cnt_init)(struct amdgpu_device *adev); @@ -104,4 +108,7
> > @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
> >  		struct amdgpu_iv_entry *entry);
> >  int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
> >  			uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst);
> > +
> > +int amdgpu_umc_scan_all_umc_channels(struct amdgpu_device *adev,
> > +			umc_func func, void *data);
> >  #endif
> > diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> > b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> > index fb55e8cb9967..6dff313ac04c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> > @@ -76,10 +76,13 @@ static inline uint32_t
> > get_umc_v8_10_reg_offset(struct amdgpu_device *adev,
> >  		UMC_8_NODE_DIST * node_inst;
> >  }
> >
> > -static void umc_v8_10_clear_error_count_per_channel(struct
> > amdgpu_device *adev,
> > -					uint32_t umc_reg_offset)
> > +static int umc_v8_10_clear_error_count_per_channel(struct
> > +amdgpu_device
> > *adev,
> > +					uint32_t node_inst, uint32_t umc_inst,
> > +					uint32_t ch_inst, void *data)
> >  {
> >  	uint32_t ecc_err_cnt_addr;
> > +	uint32_t umc_reg_offset =
> > +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> >
> >  	ecc_err_cnt_addr =
> >  		SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); @@
> > -87,24 +90,14 @@ static void
> > umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev,
> >  	/* clear error count */
> >  	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
> >  			UMC_V8_10_CE_CNT_INIT);
> > +
> > +	return 0;
> >  }
> >
> >  static void umc_v8_10_clear_error_count(struct amdgpu_device *adev)  {
> > -	uint32_t node_inst       = 0;
> > -	uint32_t umc_inst        = 0;
> > -	uint32_t ch_inst         = 0;
> > -	uint32_t umc_reg_offset  = 0;
> > -
> > -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> > {
> > -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> > -						node_inst,
> > -						umc_inst,
> > -						ch_inst);
> > -
> > -		umc_v8_10_clear_error_count_per_channel(adev,
> > -						umc_reg_offset);
> > -	}
> > +	amdgpu_umc_scan_all_umc_channels(adev,
> > +		umc_v8_10_clear_error_count_per_channel, NULL);
> >  }
> >
> >  static void umc_v8_10_query_correctable_error_count(struct
> > amdgpu_device *adev, @@ -147,29 +140,29 @@ static void
> > umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev
> >  		*error_count += 1;
> >  }
> >
> > +static int umc_v8_10_query_ecc_error_count(struct amdgpu_device *adev,
> > +					uint32_t node_inst, uint32_t umc_inst,
> > +					uint32_t ch_inst, void *data)
> > +{
> > +	struct ras_err_data *err_data = (struct ras_err_data *)data;
> > +	uint32_t umc_reg_offset =
> > +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> > +
> > +	umc_v8_10_query_correctable_error_count(adev,
> > +					umc_reg_offset,
> > +					&(err_data->ce_count));
> > +	umc_v8_10_query_uncorrectable_error_count(adev,
> > +					umc_reg_offset,
> > +					&(err_data->ue_count));
> > +
> > +	return 0;
> > +}
> > +
> >  static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev,
> >  					   void *ras_error_status)
> >  {
> > -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> > -
> > -	uint32_t node_inst       = 0;
> > -	uint32_t umc_inst        = 0;
> > -	uint32_t ch_inst         = 0;
> > -	uint32_t umc_reg_offset  = 0;
> > -
> > -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> > {
> > -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> > -						node_inst,
> > -						umc_inst,
> > -						ch_inst);
> > -
> > -		umc_v8_10_query_correctable_error_count(adev,
> > -						umc_reg_offset,
> > -						&(err_data->ce_count));
> > -		umc_v8_10_query_uncorrectable_error_count(adev,
> > -						umc_reg_offset,
> > -						&(err_data->ue_count));
> > -	}
> > +	amdgpu_umc_scan_all_umc_channels(adev,
> > +		umc_v8_10_query_ecc_error_count, ras_error_status);
> >
> >  	umc_v8_10_clear_error_count(adev);
> >  }
> > @@ -248,28 +241,28 @@ static void
> > umc_v8_10_convert_error_address(struct
> > amdgpu_device *adev,
> >  	}
> >  }
> >
> > -static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
> > -					 struct ras_err_data *err_data,
> > -					 uint32_t umc_reg_offset,
> > -					 uint32_t node_inst,
> > -					 uint32_t ch_inst,
> > -					 uint32_t umc_inst)
> > +static int umc_v8_10_query_error_address(struct amdgpu_device *adev,
> > +					uint32_t node_inst, uint32_t umc_inst,
> > +					uint32_t ch_inst, void *data)
> >  {
> >  	uint64_t mc_umc_status_addr;
> >  	uint64_t mc_umc_status, err_addr;
> >  	uint64_t mc_umc_addrt0;
> > +	struct ras_err_data *err_data = (struct ras_err_data *)data;
> > +	uint32_t umc_reg_offset =
> > +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> >
> >  	mc_umc_status_addr =
> >  		SOC15_REG_OFFSET(UMC, 0,
> > regMCA_UMC_UMC0_MCUMC_STATUST0);
> >  	mc_umc_status = RREG64_PCIE((mc_umc_status_addr +
> > umc_reg_offset) * 4);
> >
> >  	if (mc_umc_status == 0)
> > -		return;
> > +		return 0;
> >
> >  	if (!err_data->err_addr) {
> >  		/* clear umc status */
> >  		WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4,
> 0x0ULL);
> > -		return;
> > +		return 0;
> >  	}
> >
> >  	/* calculate error address if ue error is detected */ @@ -287,37
> > +280,25 @@ static void umc_v8_10_query_error_address(struct
> > amdgpu_device *adev,
> >
> >  	/* clear umc status */
> >  	WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
> > +
> > +	return 0;
> >  }
> >
> >  static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev,
> >  					     void *ras_error_status)
> >  {
> > -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> > -	uint32_t node_inst       = 0;
> > -	uint32_t umc_inst        = 0;
> > -	uint32_t ch_inst         = 0;
> > -	uint32_t umc_reg_offset  = 0;
> > -
> > -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> > {
> > -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> > -						node_inst,
> > -						umc_inst,
> > -						ch_inst);
> > -
> > -		umc_v8_10_query_error_address(adev,
> > -					err_data,
> > -					umc_reg_offset,
> > -					node_inst,
> > -					ch_inst,
> > -					umc_inst);
> > -	}
> > +	amdgpu_umc_scan_all_umc_channels(adev,
> > +		umc_v8_10_query_error_address, ras_error_status);
> >  }
> >
> > -static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device
> *adev,
> > -					      uint32_t umc_reg_offset)
> > +static int umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
> > +					uint32_t node_inst, uint32_t umc_inst,
> > +					uint32_t ch_inst, void *data)
> >  {
> >  	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
> >  	uint32_t ecc_err_cnt_addr;
> > +	uint32_t umc_reg_offset =
> > +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> >
> >  	ecc_err_cnt_sel_addr =
> >  		SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel);
> @@ -332,23
> > +313,14 @@ static void umc_v8_10_err_cnt_init_per_channel(struct
> > amdgpu_device *adev,
> >  	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4,
> > ecc_err_cnt_sel);
> >  	/* set error count to initial value */
> >  	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
> > UMC_V8_10_CE_CNT_INIT);
> > +
> > +	return 0;
> >  }
> >
> >  static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev)  {
> > -	uint32_t node_inst       = 0;
> > -	uint32_t umc_inst        = 0;
> > -	uint32_t ch_inst         = 0;
> > -	uint32_t umc_reg_offset  = 0;
> > -
> > -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> > {
> > -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> > -						node_inst,
> > -						umc_inst,
> > -						ch_inst);
> > -
> > -		umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset);
> > -	}
> > +	amdgpu_umc_scan_all_umc_channels(adev,
> > +		umc_v8_10_err_cnt_init_per_channel, NULL);
> >  }
> >
> >  static bool umc_v8_10_query_ras_poison_mode(struct amdgpu_device
> > *adev) @@ -406,37 +378,35 @@ static void
> > umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_dev
> >  	}
> >  }
> >
> > +static int umc_v8_10_ecc_info_query_ecc_error_count(struct
> > +amdgpu_device
> > *adev,
> > +					uint32_t node_inst, uint32_t umc_inst,
> > +					uint32_t ch_inst, void *data)
> > +{
> > +	struct ras_err_data *err_data = (struct ras_err_data *)data;
> > +
> > +	umc_v8_10_ecc_info_query_correctable_error_count(adev,
> > +					node_inst, umc_inst, ch_inst,
> > +					&(err_data->ce_count));
> > +	umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
> > +					node_inst, umc_inst, ch_inst,
> > +					&(err_data->ue_count));
> > +	return 0;
> > +}
> > +
> >  static void umc_v8_10_ecc_info_query_ras_error_count(struct
> > amdgpu_device *adev,
> >  					void *ras_error_status)
> >  {
> > -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> > -
> > -	uint32_t node_inst       = 0;
> > -	uint32_t umc_inst        = 0;
> > -	uint32_t ch_inst         = 0;
> > -
> > -	/* TODO: driver needs to toggle DF Cstate to ensure
> > -	 * safe access of UMC registers. Will add the protection
> > -	 */
> > -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> > {
> > -		umc_v8_10_ecc_info_query_correctable_error_count(adev,
> > -							node_inst, umc_inst,
> > ch_inst,
> > -							&(err_data-
> > >ce_count));
> > -		umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
> > -							node_inst, umc_inst,
> > ch_inst,
> > -							&(err_data-
> > >ue_count));
> > -	}
> > +	amdgpu_umc_scan_all_umc_channels(adev,
> > +		umc_v8_10_ecc_info_query_ecc_error_count,
> > ras_error_status);
> >  }
> >
> > -static void umc_v8_10_ecc_info_query_error_address(struct
> > amdgpu_device *adev,
> > -					struct ras_err_data *err_data,
> > -					uint32_t ch_inst,
> > -					uint32_t umc_inst,
> > -					uint32_t node_inst)
> > +static int umc_v8_10_ecc_info_query_error_address(struct
> > +amdgpu_device
> > *adev,
> > +					uint32_t node_inst, uint32_t umc_inst,
> > +					uint32_t ch_inst, void *data)
> >  {
> >  	uint32_t eccinfo_table_idx;
> >  	uint64_t mc_umc_status, err_addr;
> > -
> > +	struct ras_err_data *err_data = (struct ras_err_data *)data;
> >  	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> >
> >  	eccinfo_table_idx = node_inst * adev->umc.umc_inst_num * @@ -
> > 447,10 +417,10 @@ static void
> > umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev,
> >  	mc_umc_status = ras-
> > >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
> >
> >  	if (mc_umc_status == 0)
> > -		return;
> > +		return 0;
> >
> >  	if (!err_data->err_addr)
> > -		return;
> > +		return 0;
> >
> >  	/* calculate error address if ue error is detected */
> >  	if (REG_GET_FIELD(mc_umc_status,
> > MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && @@ -463,28 +433,15
> @@ static
> > void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device
> > *adev,
> >  		umc_v8_10_convert_error_address(adev, err_data, err_addr,
> >  					ch_inst, umc_inst, node_inst,
> > mc_umc_status);
> >  	}
> > +
> > +	return 0;
> >  }
> >
> >  static void umc_v8_10_ecc_info_query_ras_error_address(struct
> > amdgpu_device *adev,
> >  					void *ras_error_status)
> >  {
> > -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> > -
> > -	uint32_t node_inst       = 0;
> > -	uint32_t umc_inst        = 0;
> > -	uint32_t ch_inst         = 0;
> > -
> > -	/* TODO: driver needs to toggle DF Cstate to ensure
> > -	 * safe access of UMC resgisters. Will add the protection
> > -	 * when firmware interface is ready
> > -	 */
> > -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> > {
> > -		umc_v8_10_ecc_info_query_error_address(adev,
> > -						err_data,
> > -						ch_inst,
> > -						umc_inst,
> > -						node_inst);
> > -	}
> > +	amdgpu_umc_scan_all_umc_channels(adev,
> > +		umc_v8_10_ecc_info_query_error_address, ras_error_status);
> >  }
> >
> >  const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
> > --
> > 2.34.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10
  2023-04-03  7:21     ` Zhou1, Tao
@ 2023-04-03  7:25       ` Chai, Thomas
  0 siblings, 0 replies; 6+ messages in thread
From: Chai, Thomas @ 2023-04-03  7:25 UTC (permalink / raw)
  To: Zhou1, Tao, amd-gfx; +Cc: Li, Candice, Yang, Stanley, Zhang, Hawking

[AMD Official Use Only - General]

OK, Will do.


-----------------
Best Regards,
Thomas

-----Original Message-----
From: Zhou1, Tao <Tao.Zhou1@amd.com> 
Sent: Monday, April 3, 2023 3:21 PM
To: Chai, Thomas <YiPeng.Chai@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>; Li, Candice <Candice.Li@amd.com>; Yang, Stanley <Stanley.Yang@amd.com>
Subject: RE: [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10

[AMD Official Use Only - General]



> -----Original Message-----
> From: Chai, Thomas <YiPeng.Chai@amd.com>
> Sent: Monday, April 3, 2023 3:00 PM
> To: Zhou1, Tao <Tao.Zhou1@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking <Hawking.Zhang@amd.com>; Li, Candice 
> <Candice.Li@amd.com>; Yang, Stanley <Stanley.Yang@amd.com>
> Subject: RE: [PATCH 1/2] drm/amdgpu: optimize redundant code in 
> umc_v8_10
> 
> [AMD Official Use Only - General]
> 
> 
> 
> 
> -----------------
> Best Regards,
> Thomas
> 
> -----Original Message-----
> From: Zhou1, Tao <Tao.Zhou1@amd.com>
> Sent: Monday, April 3, 2023 11:45 AM
> To: Chai, Thomas <YiPeng.Chai@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Zhang, Hawking <Hawking.Zhang@amd.com>; Li, Candice 
> <Candice.Li@amd.com>; Yang, Stanley <Stanley.Yang@amd.com>
> Subject: RE: [PATCH 1/2] drm/amdgpu: optimize redundant code in 
> umc_v8_10
> 
> [AMD Official Use Only - General]
> 
> 
> 
> > -----Original Message-----
> > From: Chai, Thomas <YiPeng.Chai@amd.com>
> > Sent: Monday, April 3, 2023 9:59 AM
> > To: amd-gfx@lists.freedesktop.org
> > Cc: Chai, Thomas <YiPeng.Chai@amd.com>; Zhang, Hawking 
> > <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Li,
> Candice
> > <Candice.Li@amd.com>; Yang, Stanley <Stanley.Yang@amd.com>; Chai, 
> > Thomas <YiPeng.Chai@amd.com>
> > Subject: [PATCH 1/2] drm/amdgpu: optimize redundant code in 
> > umc_v8_10
> >
> > Optimize redundant code in umc_v8_10
> >
> > Signed-off-by: YiPeng Chai <YiPeng.Chai@amd.com>
> > ---
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c |  31 ++++
> >  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h |   7 +
> >  drivers/gpu/drm/amd/amdgpu/umc_v8_10.c  | 197
> > +++++++++---------------
> >  3 files changed, 115 insertions(+), 120 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > index 9e2e97207e53..734442315cf6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> > @@ -302,3 +302,34 @@ void amdgpu_umc_fill_error_record(struct
> > ras_err_data *err_data,
> >
> >  	err_data->err_addr_cnt++;
> >  }
> > +
> > +int amdgpu_umc_scan_all_umc_channels(struct amdgpu_device *adev,
> > +			umc_func func, void *data)
> > +{
> > +	uint32_t node_inst       = 0;
> > +	uint32_t umc_inst        = 0;
> > +	uint32_t ch_inst         = 0;
> > +	int ret = 0;
> > +
> > +	if (adev->umc.node_inst_num) {
> > +		LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst,
> > ch_inst) {
> > +			ret = func(adev, node_inst, umc_inst, ch_inst, data);
> > +			if (ret) {
> > +				dev_err(adev->dev, "Node %d umc %d ch %d
> > func returns %d\n",
> > +					node_inst, umc_inst, ch_inst, ret);
> > +				return ret;
> > +			}
> > +		}
> > +	} else {
> > +		LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) {
> 
> >[Tao] for ASIC which doesn't support node, can we set its 
> >node_inst_num to 1
> and retire the macro LOOP_UMC_INST_AND_CH?
> 
> [Thomas] I am afraid not.
> 
> 	" #define LOOP_UMC_NODE_INST(node_inst) \
> 		for_each_set_bit((node_inst), &(adev->umc.active_mask),
> adev->umc.node_inst_num) "
> 
> 	The node instance loop of LOOP_UMC_EACH_NODE_INST_AND_CH supports 
> node harvest, so node_inst_num is not the real node instance number.

[Tao] we can set both node_inst_num and active_mask to 1, but either way is fine for me.
BTW, I think amdgpu_umc_loop_channels  is simple than amdgpu_umc_scan_all_umc_channels, with this fixed, the series is:

Reviewed-by: Tao Zhou <tao.zhou1@amd.com>

> 
> 
> > +			ret = func(adev, 0, umc_inst, ch_inst, data);
> > +			if (ret) {
> > +				dev_err(adev->dev, "Umc %d ch %d func
> > returns %d\n",
> > +					umc_inst, ch_inst, ret);
> > +				return ret;
> > +			}
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> > index d7f1229ff11f..f279c8057f96 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
> > @@ -47,6 +47,10 @@
> >  #define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst,
> ch_inst)
> > \
> >  		LOOP_UMC_NODE_INST((node_inst))
> > LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
> >
> > +
> > +typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
> > +			uint32_t umc_inst, uint32_t ch_inst, void *data);
> > +
> >  struct amdgpu_umc_ras {
> >  	struct amdgpu_ras_block_object ras_block;
> >  	void (*err_cnt_init)(struct amdgpu_device *adev); @@ -104,4 +108,7 
> > @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
> >  		struct amdgpu_iv_entry *entry);
> >  int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
> >  			uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst);
> > +
> > +int amdgpu_umc_scan_all_umc_channels(struct amdgpu_device *adev,
> > +			umc_func func, void *data);
> >  #endif
> > diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> > b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> > index fb55e8cb9967..6dff313ac04c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
> > @@ -76,10 +76,13 @@ static inline uint32_t 
> > get_umc_v8_10_reg_offset(struct amdgpu_device *adev,
> >  		UMC_8_NODE_DIST * node_inst;
> >  }
> >
> > -static void umc_v8_10_clear_error_count_per_channel(struct
> > amdgpu_device *adev,
> > -					uint32_t umc_reg_offset)
> > +static int umc_v8_10_clear_error_count_per_channel(struct
> > +amdgpu_device
> > *adev,
> > +					uint32_t node_inst, uint32_t umc_inst,
> > +					uint32_t ch_inst, void *data)
> >  {
> >  	uint32_t ecc_err_cnt_addr;
> > +	uint32_t umc_reg_offset =
> > +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> >
> >  	ecc_err_cnt_addr =
> >  		SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt); @@
> > -87,24 +90,14 @@ static void
> > umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev,
> >  	/* clear error count */
> >  	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
> >  			UMC_V8_10_CE_CNT_INIT);
> > +
> > +	return 0;
> >  }
> >
> >  static void umc_v8_10_clear_error_count(struct amdgpu_device *adev)  {
> > -	uint32_t node_inst       = 0;
> > -	uint32_t umc_inst        = 0;
> > -	uint32_t ch_inst         = 0;
> > -	uint32_t umc_reg_offset  = 0;
> > -
> > -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> > {
> > -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> > -						node_inst,
> > -						umc_inst,
> > -						ch_inst);
> > -
> > -		umc_v8_10_clear_error_count_per_channel(adev,
> > -						umc_reg_offset);
> > -	}
> > +	amdgpu_umc_scan_all_umc_channels(adev,
> > +		umc_v8_10_clear_error_count_per_channel, NULL);
> >  }
> >
> >  static void umc_v8_10_query_correctable_error_count(struct
> > amdgpu_device *adev, @@ -147,29 +140,29 @@ static void 
> > umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev
> >  		*error_count += 1;
> >  }
> >
> > +static int umc_v8_10_query_ecc_error_count(struct amdgpu_device *adev,
> > +					uint32_t node_inst, uint32_t umc_inst,
> > +					uint32_t ch_inst, void *data)
> > +{
> > +	struct ras_err_data *err_data = (struct ras_err_data *)data;
> > +	uint32_t umc_reg_offset =
> > +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> > +
> > +	umc_v8_10_query_correctable_error_count(adev,
> > +					umc_reg_offset,
> > +					&(err_data->ce_count));
> > +	umc_v8_10_query_uncorrectable_error_count(adev,
> > +					umc_reg_offset,
> > +					&(err_data->ue_count));
> > +
> > +	return 0;
> > +}
> > +
> >  static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev,
> >  					   void *ras_error_status)
> >  {
> > -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> > -
> > -	uint32_t node_inst       = 0;
> > -	uint32_t umc_inst        = 0;
> > -	uint32_t ch_inst         = 0;
> > -	uint32_t umc_reg_offset  = 0;
> > -
> > -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> > {
> > -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> > -						node_inst,
> > -						umc_inst,
> > -						ch_inst);
> > -
> > -		umc_v8_10_query_correctable_error_count(adev,
> > -						umc_reg_offset,
> > -						&(err_data->ce_count));
> > -		umc_v8_10_query_uncorrectable_error_count(adev,
> > -						umc_reg_offset,
> > -						&(err_data->ue_count));
> > -	}
> > +	amdgpu_umc_scan_all_umc_channels(adev,
> > +		umc_v8_10_query_ecc_error_count, ras_error_status);
> >
> >  	umc_v8_10_clear_error_count(adev);
> >  }
> > @@ -248,28 +241,28 @@ static void
> > umc_v8_10_convert_error_address(struct
> > amdgpu_device *adev,
> >  	}
> >  }
> >
> > -static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
> > -					 struct ras_err_data *err_data,
> > -					 uint32_t umc_reg_offset,
> > -					 uint32_t node_inst,
> > -					 uint32_t ch_inst,
> > -					 uint32_t umc_inst)
> > +static int umc_v8_10_query_error_address(struct amdgpu_device *adev,
> > +					uint32_t node_inst, uint32_t umc_inst,
> > +					uint32_t ch_inst, void *data)
> >  {
> >  	uint64_t mc_umc_status_addr;
> >  	uint64_t mc_umc_status, err_addr;
> >  	uint64_t mc_umc_addrt0;
> > +	struct ras_err_data *err_data = (struct ras_err_data *)data;
> > +	uint32_t umc_reg_offset =
> > +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> >
> >  	mc_umc_status_addr =
> >  		SOC15_REG_OFFSET(UMC, 0,
> > regMCA_UMC_UMC0_MCUMC_STATUST0);
> >  	mc_umc_status = RREG64_PCIE((mc_umc_status_addr +
> > umc_reg_offset) * 4);
> >
> >  	if (mc_umc_status == 0)
> > -		return;
> > +		return 0;
> >
> >  	if (!err_data->err_addr) {
> >  		/* clear umc status */
> >  		WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4,
> 0x0ULL);
> > -		return;
> > +		return 0;
> >  	}
> >
> >  	/* calculate error address if ue error is detected */ @@ -287,37
> > +280,25 @@ static void umc_v8_10_query_error_address(struct
> > amdgpu_device *adev,
> >
> >  	/* clear umc status */
> >  	WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
> > +
> > +	return 0;
> >  }
> >
> >  static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev,
> >  					     void *ras_error_status)
> >  {
> > -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> > -	uint32_t node_inst       = 0;
> > -	uint32_t umc_inst        = 0;
> > -	uint32_t ch_inst         = 0;
> > -	uint32_t umc_reg_offset  = 0;
> > -
> > -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> > {
> > -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> > -						node_inst,
> > -						umc_inst,
> > -						ch_inst);
> > -
> > -		umc_v8_10_query_error_address(adev,
> > -					err_data,
> > -					umc_reg_offset,
> > -					node_inst,
> > -					ch_inst,
> > -					umc_inst);
> > -	}
> > +	amdgpu_umc_scan_all_umc_channels(adev,
> > +		umc_v8_10_query_error_address, ras_error_status);
> >  }
> >
> > -static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device
> *adev,
> > -					      uint32_t umc_reg_offset)
> > +static int umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
> > +					uint32_t node_inst, uint32_t umc_inst,
> > +					uint32_t ch_inst, void *data)
> >  {
> >  	uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
> >  	uint32_t ecc_err_cnt_addr;
> > +	uint32_t umc_reg_offset =
> > +		get_umc_v8_10_reg_offset(adev, node_inst, umc_inst, ch_inst);
> >
> >  	ecc_err_cnt_sel_addr =
> >  		SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel);
> @@ -332,23
> > +313,14 @@ static void umc_v8_10_err_cnt_init_per_channel(struct
> > amdgpu_device *adev,
> >  	WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, 
> > ecc_err_cnt_sel);
> >  	/* set error count to initial value */
> >  	WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, 
> > UMC_V8_10_CE_CNT_INIT);
> > +
> > +	return 0;
> >  }
> >
> >  static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev)  {
> > -	uint32_t node_inst       = 0;
> > -	uint32_t umc_inst        = 0;
> > -	uint32_t ch_inst         = 0;
> > -	uint32_t umc_reg_offset  = 0;
> > -
> > -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> > {
> > -		umc_reg_offset = get_umc_v8_10_reg_offset(adev,
> > -						node_inst,
> > -						umc_inst,
> > -						ch_inst);
> > -
> > -		umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset);
> > -	}
> > +	amdgpu_umc_scan_all_umc_channels(adev,
> > +		umc_v8_10_err_cnt_init_per_channel, NULL);
> >  }
> >
> >  static bool umc_v8_10_query_ras_poison_mode(struct amdgpu_device
> > *adev) @@ -406,37 +378,35 @@ static void 
> > umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_dev
> >  	}
> >  }
> >
> > +static int umc_v8_10_ecc_info_query_ecc_error_count(struct
> > +amdgpu_device
> > *adev,
> > +					uint32_t node_inst, uint32_t umc_inst,
> > +					uint32_t ch_inst, void *data)
> > +{
> > +	struct ras_err_data *err_data = (struct ras_err_data *)data;
> > +
> > +	umc_v8_10_ecc_info_query_correctable_error_count(adev,
> > +					node_inst, umc_inst, ch_inst,
> > +					&(err_data->ce_count));
> > +	umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
> > +					node_inst, umc_inst, ch_inst,
> > +					&(err_data->ue_count));
> > +	return 0;
> > +}
> > +
> >  static void umc_v8_10_ecc_info_query_ras_error_count(struct
> > amdgpu_device *adev,
> >  					void *ras_error_status)
> >  {
> > -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> > -
> > -	uint32_t node_inst       = 0;
> > -	uint32_t umc_inst        = 0;
> > -	uint32_t ch_inst         = 0;
> > -
> > -	/* TODO: driver needs to toggle DF Cstate to ensure
> > -	 * safe access of UMC registers. Will add the protection
> > -	 */
> > -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> > {
> > -		umc_v8_10_ecc_info_query_correctable_error_count(adev,
> > -							node_inst, umc_inst,
> > ch_inst,
> > -							&(err_data-
> > >ce_count));
> > -		umc_v8_10_ecc_info_query_uncorrectable_error_count(adev,
> > -							node_inst, umc_inst,
> > ch_inst,
> > -							&(err_data-
> > >ue_count));
> > -	}
> > +	amdgpu_umc_scan_all_umc_channels(adev,
> > +		umc_v8_10_ecc_info_query_ecc_error_count,
> > ras_error_status);
> >  }
> >
> > -static void umc_v8_10_ecc_info_query_error_address(struct
> > amdgpu_device *adev,
> > -					struct ras_err_data *err_data,
> > -					uint32_t ch_inst,
> > -					uint32_t umc_inst,
> > -					uint32_t node_inst)
> > +static int umc_v8_10_ecc_info_query_error_address(struct
> > +amdgpu_device
> > *adev,
> > +					uint32_t node_inst, uint32_t umc_inst,
> > +					uint32_t ch_inst, void *data)
> >  {
> >  	uint32_t eccinfo_table_idx;
> >  	uint64_t mc_umc_status, err_addr;
> > -
> > +	struct ras_err_data *err_data = (struct ras_err_data *)data;
> >  	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> >
> >  	eccinfo_table_idx = node_inst * adev->umc.umc_inst_num * @@ -
> > 447,10 +417,10 @@ static void
> > umc_v8_10_ecc_info_query_error_address(struct amdgpu_device *adev,
> >  	mc_umc_status = ras-
> > >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
> >
> >  	if (mc_umc_status == 0)
> > -		return;
> > +		return 0;
> >
> >  	if (!err_data->err_addr)
> > -		return;
> > +		return 0;
> >
> >  	/* calculate error address if ue error is detected */
> >  	if (REG_GET_FIELD(mc_umc_status,
> > MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && @@ -463,28 +433,15
> @@ static
> > void umc_v8_10_ecc_info_query_error_address(struct amdgpu_device 
> > *adev,
> >  		umc_v8_10_convert_error_address(adev, err_data, err_addr,
> >  					ch_inst, umc_inst, node_inst,
> > mc_umc_status);
> >  	}
> > +
> > +	return 0;
> >  }
> >
> >  static void umc_v8_10_ecc_info_query_ras_error_address(struct
> > amdgpu_device *adev,
> >  					void *ras_error_status)
> >  {
> > -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
> > -
> > -	uint32_t node_inst       = 0;
> > -	uint32_t umc_inst        = 0;
> > -	uint32_t ch_inst         = 0;
> > -
> > -	/* TODO: driver needs to toggle DF Cstate to ensure
> > -	 * safe access of UMC resgisters. Will add the protection
> > -	 * when firmware interface is ready
> > -	 */
> > -	LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst)
> > {
> > -		umc_v8_10_ecc_info_query_error_address(adev,
> > -						err_data,
> > -						ch_inst,
> > -						umc_inst,
> > -						node_inst);
> > -	}
> > +	amdgpu_umc_scan_all_umc_channels(adev,
> > +		umc_v8_10_ecc_info_query_error_address, ras_error_status);
> >  }
> >
> >  const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
> > --
> > 2.34.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-04-03  7:25 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-03  1:59 [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10 YiPeng Chai
2023-04-03  1:59 ` [PATCH 2/2] drm/amdgpu: optimize redundant code in umc_v6_7 YiPeng Chai
2023-04-03  3:44 ` [PATCH 1/2] drm/amdgpu: optimize redundant code in umc_v8_10 Zhou1, Tao
2023-04-03  7:00   ` Chai, Thomas
2023-04-03  7:21     ` Zhou1, Tao
2023-04-03  7:25       ` Chai, Thomas

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.