* [PATCH] drm/amdgpu: add umc_convert_error_address to simplify code
@ 2022-01-26 11:04 Tao Zhou
2022-01-28 3:33 ` Zhou1, Tao
0 siblings, 1 reply; 2+ messages in thread
From: Tao Zhou @ 2022-01-26 11:04 UTC (permalink / raw)
To: amd-gfx, hawking.zhang, stanley.yang, yipeng.chai, john.clements,
Lijo.Lazar
Cc: Tao Zhou
Make code reusable and more simple.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 94 +++++++++------------------
drivers/gpu/drm/amd/amdgpu/umc_v8_7.c | 82 +++++++++--------------
2 files changed, 61 insertions(+), 115 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index 47452b61b615..4abcdda42ac6 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -114,21 +114,13 @@ static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
}
}
-static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
- struct ras_err_data *err_data,
- uint32_t ch_inst,
- uint32_t umc_inst)
+static void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint32_t ch_inst,
+ uint32_t umc_inst, uint64_t err_addr,
+ uint64_t mc_umc_status)
{
- uint64_t mc_umc_status, err_addr, soc_pa, retired_page, column;
uint32_t channel_index;
- uint32_t eccinfo_table_idx;
- struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-
- eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
- channel_index =
- adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
-
- mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ uint64_t soc_pa, retired_page, column;
if (mc_umc_status == 0)
return;
@@ -136,12 +128,13 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
if (!err_data->err_addr)
return;
+ channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
/* calculate error address if ue/ce error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
-
- err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
/* translate umc channel address to soc pa, 3 parts are included */
@@ -173,6 +166,23 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
}
}
+static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ uint32_t ch_inst,
+ uint32_t umc_inst)
+{
+ uint64_t mc_umc_status, err_addr;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+
+ umc_v6_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
+ err_addr, mc_umc_status);
+}
+
static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
void *ras_error_status)
{
@@ -348,9 +358,7 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
uint32_t umc_inst)
{
uint32_t mc_umc_status_addr;
- uint32_t channel_index;
- uint64_t mc_umc_status, mc_umc_addrt0;
- uint64_t err_addr, soc_pa, retired_page, column;
+ uint64_t mc_umc_status, mc_umc_addrt0, err_addr;
mc_umc_status_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
@@ -358,54 +366,10 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
- if (mc_umc_status == 0)
- return;
-
- if (!err_data->err_addr) {
- /* clear umc status */
- WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
- return;
- }
-
- channel_index =
- adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
-
- /* calculate error address if ue/ce error is detected */
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
-
- err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
- err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
-
- /* translate umc channel address to soc pa, 3 parts are included */
- soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
- ADDR_OF_256B_BLOCK(channel_index) |
- OFFSET_IN_256B_BLOCK(err_addr);
-
- /* The umc channel bits are not original values, they are hashed */
- SET_CHANNEL_HASH(channel_index, soc_pa);
-
- /* clear [C4 C3 C2] in soc physical address */
- soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
-
- /* we only save ue error information currently, ce is skipped */
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
- == 1) {
- /* loop for all possibilities of [C4 C3 C2] */
- for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) {
- retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT);
- amdgpu_umc_fill_error_record(err_data, err_addr,
- retired_page, channel_index, umc_inst);
-
- /* shift R14 bit */
- retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT);
- amdgpu_umc_fill_error_record(err_data, err_addr,
- retired_page, channel_index, umc_inst);
- }
- }
- }
+ umc_v6_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
+ err_addr, mc_umc_status);
/* clear umc status */
WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
index de85a998ef99..df15b87ae12b 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
@@ -115,21 +115,13 @@ static void umc_v8_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
}
}
-static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
- struct ras_err_data *err_data,
- uint32_t ch_inst,
- uint32_t umc_inst)
+static void umc_v8_7_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data, uint32_t ch_inst,
+ uint32_t umc_inst, uint64_t err_addr,
+ uint64_t mc_umc_status)
{
- uint64_t mc_umc_status, err_addr, retired_page;
- uint32_t channel_index;
- uint32_t eccinfo_table_idx;
- struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
-
- eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
- channel_index =
- adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
-
- mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ uint32_t lsb, channel_index;
+ uint64_t retired_page;
if (mc_umc_status == 0)
return;
@@ -137,13 +129,16 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
if (!err_data->err_addr)
return;
+ channel_index =
+ adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+
/* calculate error address if ue/ce error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
-
- err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+ lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+ err_addr &= ~((0x1ULL << lsb) - 1);
/* translate umc channel address to soc pa, 3 parts are included */
retired_page = ADDR_OF_4KB_BLOCK(err_addr) |
@@ -157,6 +152,22 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
retired_page, channel_index, umc_inst);
}
}
+static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ uint32_t ch_inst,
+ uint32_t umc_inst)
+{
+ uint64_t mc_umc_status, err_addr;
+ uint32_t eccinfo_table_idx;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
+ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
+ err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
+
+ umc_v8_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
+ err_addr, mc_umc_status);
+}
static void umc_v8_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
void *ras_error_status)
@@ -330,9 +341,8 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
uint32_t ch_inst,
uint32_t umc_inst)
{
- uint32_t lsb, mc_umc_status_addr;
- uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
- uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
+ uint32_t mc_umc_status_addr;
+ uint64_t mc_umc_status, err_addr, mc_umc_addrt0;
mc_umc_status_addr =
SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0);
@@ -340,38 +350,10 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0);
mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
+ err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
- if (mc_umc_status == 0)
- return;
-
- if (!err_data->err_addr) {
- /* clear umc status */
- WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
- return;
- }
-
- /* calculate error address if ue/ce error is detected */
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
-
- err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
- /* the lowest lsb bits should be ignored */
- lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
- err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
- err_addr &= ~((0x1ULL << lsb) - 1);
-
- /* translate umc channel address to soc pa, 3 parts are included */
- retired_page = ADDR_OF_4KB_BLOCK(err_addr) |
- ADDR_OF_256B_BLOCK(channel_index) |
- OFFSET_IN_256B_BLOCK(err_addr);
-
- /* we only save ue error information currently, ce is skipped */
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
- == 1)
- amdgpu_umc_fill_error_record(err_data, err_addr,
- retired_page, channel_index, umc_inst);
- }
+ umc_v8_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
+ err_addr, mc_umc_status);
/* clear umc status */
WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
--
2.17.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
* RE: [PATCH] drm/amdgpu: add umc_convert_error_address to simplify code
2022-01-26 11:04 [PATCH] drm/amdgpu: add umc_convert_error_address to simplify code Tao Zhou
@ 2022-01-28 3:33 ` Zhou1, Tao
0 siblings, 0 replies; 2+ messages in thread
From: Zhou1, Tao @ 2022-01-28 3:33 UTC (permalink / raw)
To: amd-gfx, Zhang, Hawking, Yang, Stanley, Chai, Thomas, Clements,
John, Lazar, Lijo
[AMD Official Use Only]
Ping...
> -----Original Message-----
> From: Zhou1, Tao <Tao.Zhou1@amd.com>
> Sent: Wednesday, January 26, 2022 7:05 PM
> To: amd-gfx@lists.freedesktop.org; Zhang, Hawking
> <Hawking.Zhang@amd.com>; Yang, Stanley <Stanley.Yang@amd.com>; Chai,
> Thomas <YiPeng.Chai@amd.com>; Clements, John <John.Clements@amd.com>;
> Lazar, Lijo <Lijo.Lazar@amd.com>
> Cc: Zhou1, Tao <Tao.Zhou1@amd.com>
> Subject: [PATCH] drm/amdgpu: add umc_convert_error_address to simplify
> code
>
> Make code reusable and more simple.
>
> Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 94 +++++++++------------------
> drivers/gpu/drm/amd/amdgpu/umc_v8_7.c | 82 +++++++++--------------
> 2 files changed, 61 insertions(+), 115 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> index 47452b61b615..4abcdda42ac6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
> @@ -114,21 +114,13 @@ static void
> umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
> }
> }
>
> -static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device
> *adev,
> - struct ras_err_data *err_data,
> - uint32_t ch_inst,
> - uint32_t umc_inst)
> +static void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
> + struct ras_err_data *err_data, uint32_t
> ch_inst,
> + uint32_t umc_inst, uint64_t err_addr,
> + uint64_t mc_umc_status)
> {
> - uint64_t mc_umc_status, err_addr, soc_pa, retired_page, column;
> uint32_t channel_index;
> - uint32_t eccinfo_table_idx;
> - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> -
> - eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
> - channel_index =
> - adev->umc.channel_idx_tbl[umc_inst * adev-
> >umc.channel_inst_num + ch_inst];
> -
> - mc_umc_status = ras-
> >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
> + uint64_t soc_pa, retired_page, column;
>
> if (mc_umc_status == 0)
> return;
> @@ -136,12 +128,13 @@ static void
> umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
> if (!err_data->err_addr)
> return;
>
> + channel_index =
> + adev->umc.channel_idx_tbl[umc_inst * adev-
> >umc.channel_inst_num +
> +ch_inst];
> +
> /* calculate error address if ue/ce error is detected */
> if (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
> (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
> REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
> -
> - err_addr = ras-
> >umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
> err_addr = REG_GET_FIELD(err_addr,
> MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
>
> /* translate umc channel address to soc pa, 3 parts are included
> */ @@ -173,6 +166,23 @@ static void
> umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
> }
> }
>
> +static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device
> *adev,
> + struct ras_err_data *err_data,
> + uint32_t ch_inst,
> + uint32_t umc_inst)
> +{
> + uint64_t mc_umc_status, err_addr;
> + uint32_t eccinfo_table_idx;
> + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> +
> + eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
> + mc_umc_status = ras-
> >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
> + err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
> +
> + umc_v6_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
> + err_addr, mc_umc_status);
> +}
> +
> static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device
> *adev,
> void *ras_error_status)
> {
> @@ -348,9 +358,7 @@ static void umc_v6_7_query_error_address(struct
> amdgpu_device *adev,
> uint32_t umc_inst)
> {
> uint32_t mc_umc_status_addr;
> - uint32_t channel_index;
> - uint64_t mc_umc_status, mc_umc_addrt0;
> - uint64_t err_addr, soc_pa, retired_page, column;
> + uint64_t mc_umc_status, mc_umc_addrt0, err_addr;
>
> mc_umc_status_addr =
> SOC15_REG_OFFSET(UMC, 0,
> regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -358,54 +366,10 @@ static
> void umc_v6_7_query_error_address(struct amdgpu_device *adev,
> SOC15_REG_OFFSET(UMC, 0,
> regMCA_UMC_UMC0_MCUMC_ADDRT0);
>
> mc_umc_status = RREG64_PCIE((mc_umc_status_addr +
> umc_reg_offset) * 4);
> + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
>
> - if (mc_umc_status == 0)
> - return;
> -
> - if (!err_data->err_addr) {
> - /* clear umc status */
> - WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4,
> 0x0ULL);
> - return;
> - }
> -
> - channel_index =
> - adev->umc.channel_idx_tbl[umc_inst * adev-
> >umc.channel_inst_num + ch_inst];
> -
> - /* calculate error address if ue/ce error is detected */
> - if (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
> - (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
> - REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
> -
> - err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) *
> 4);
> - err_addr = REG_GET_FIELD(err_addr,
> MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
> -
> - /* translate umc channel address to soc pa, 3 parts are included
> */
> - soc_pa = ADDR_OF_8KB_BLOCK(err_addr) |
> - ADDR_OF_256B_BLOCK(channel_index) |
> - OFFSET_IN_256B_BLOCK(err_addr);
> -
> - /* The umc channel bits are not original values, they are hashed
> */
> - SET_CHANNEL_HASH(channel_index, soc_pa);
> -
> - /* clear [C4 C3 C2] in soc physical address */
> - soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT);
> -
> - /* we only save ue error information currently, ce is skipped */
> - if (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
> - == 1) {
> - /* loop for all possibilities of [C4 C3 C2] */
> - for (column = 0; column <
> UMC_V6_7_NA_MAP_PA_NUM; column++) {
> - retired_page = soc_pa | (column <<
> UMC_V6_7_PA_C2_BIT);
> - amdgpu_umc_fill_error_record(err_data,
> err_addr,
> - retired_page, channel_index, umc_inst);
> -
> - /* shift R14 bit */
> - retired_page ^= (0x1ULL <<
> UMC_V6_7_PA_R14_BIT);
> - amdgpu_umc_fill_error_record(err_data,
> err_addr,
> - retired_page, channel_index, umc_inst);
> - }
> - }
> - }
> + umc_v6_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
> + err_addr, mc_umc_status);
>
> /* clear umc status */
> WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
> diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> index de85a998ef99..df15b87ae12b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
> @@ -115,21 +115,13 @@ static void
> umc_v8_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
> }
> }
>
> -static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device
> *adev,
> - struct ras_err_data *err_data,
> - uint32_t ch_inst,
> - uint32_t umc_inst)
> +static void umc_v8_7_convert_error_address(struct amdgpu_device *adev,
> + struct ras_err_data *err_data, uint32_t
> ch_inst,
> + uint32_t umc_inst, uint64_t err_addr,
> + uint64_t mc_umc_status)
> {
> - uint64_t mc_umc_status, err_addr, retired_page;
> - uint32_t channel_index;
> - uint32_t eccinfo_table_idx;
> - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> -
> - eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
> - channel_index =
> - adev->umc.channel_idx_tbl[umc_inst * adev-
> >umc.channel_inst_num + ch_inst];
> -
> - mc_umc_status = ras-
> >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
> + uint32_t lsb, channel_index;
> + uint64_t retired_page;
>
> if (mc_umc_status == 0)
> return;
> @@ -137,13 +129,16 @@ static void
> umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
> if (!err_data->err_addr)
> return;
>
> + channel_index =
> + adev->umc.channel_idx_tbl[umc_inst * adev-
> >umc.channel_inst_num +
> +ch_inst];
> +
> /* calculate error address if ue/ce error is detected */
> if (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
> (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
> REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
> -
> - err_addr = ras-
> >umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
> + lsb = REG_GET_FIELD(err_addr,
> MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
> err_addr = REG_GET_FIELD(err_addr,
> MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
> + err_addr &= ~((0x1ULL << lsb) - 1);
>
> /* translate umc channel address to soc pa, 3 parts are included
> */
> retired_page = ADDR_OF_4KB_BLOCK(err_addr) | @@ -157,6
> +152,22 @@ static void umc_v8_7_ecc_info_query_error_address(struct
> amdgpu_device *adev,
> retired_page, channel_index, umc_inst);
> }
> }
> +static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device
> *adev,
> + struct ras_err_data *err_data,
> + uint32_t ch_inst,
> + uint32_t umc_inst)
> +{
> + uint64_t mc_umc_status, err_addr;
> + uint32_t eccinfo_table_idx;
> + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
> +
> + eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
> + mc_umc_status = ras-
> >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
> + err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr;
> +
> + umc_v8_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
> + err_addr, mc_umc_status);
> +}
>
> static void umc_v8_7_ecc_info_query_ras_error_address(struct amdgpu_device
> *adev,
> void *ras_error_status)
> @@ -330,9 +341,8 @@ static void umc_v8_7_query_error_address(struct
> amdgpu_device *adev,
> uint32_t ch_inst,
> uint32_t umc_inst)
> {
> - uint32_t lsb, mc_umc_status_addr;
> - uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
> - uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev-
> >umc.channel_inst_num + ch_inst];
> + uint32_t mc_umc_status_addr;
> + uint64_t mc_umc_status, err_addr, mc_umc_addrt0;
>
> mc_umc_status_addr =
> SOC15_REG_OFFSET(UMC, 0,
> mmMCA_UMC_UMC0_MCUMC_STATUST0); @@ -340,38 +350,10 @@ static
> void umc_v8_7_query_error_address(struct amdgpu_device *adev,
> SOC15_REG_OFFSET(UMC, 0,
> mmMCA_UMC_UMC0_MCUMC_ADDRT0);
>
> mc_umc_status = RREG64_PCIE((mc_umc_status_addr +
> umc_reg_offset) * 4);
> + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
>
> - if (mc_umc_status == 0)
> - return;
> -
> - if (!err_data->err_addr) {
> - /* clear umc status */
> - WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4,
> 0x0ULL);
> - return;
> - }
> -
> - /* calculate error address if ue/ce error is detected */
> - if (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
> - (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
> - REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
> -
> - err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) *
> 4);
> - /* the lowest lsb bits should be ignored */
> - lsb = REG_GET_FIELD(err_addr,
> MCA_UMC_UMC0_MCUMC_ADDRT0, LSB);
> - err_addr = REG_GET_FIELD(err_addr,
> MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
> - err_addr &= ~((0x1ULL << lsb) - 1);
> -
> - /* translate umc channel address to soc pa, 3 parts are included
> */
> - retired_page = ADDR_OF_4KB_BLOCK(err_addr) |
> - ADDR_OF_256B_BLOCK(channel_index) |
> - OFFSET_IN_256B_BLOCK(err_addr);
> -
> - /* we only save ue error information currently, ce is skipped */
> - if (REG_GET_FIELD(mc_umc_status,
> MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
> - == 1)
> - amdgpu_umc_fill_error_record(err_data, err_addr,
> - retired_page, channel_index, umc_inst);
> - }
> + umc_v8_7_convert_error_address(adev, err_data, ch_inst, umc_inst,
> + err_addr, mc_umc_status);
>
> /* clear umc status */
> WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
> --
> 2.17.1
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2022-01-28 3:33 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-26 11:04 [PATCH] drm/amdgpu: add umc_convert_error_address to simplify code Tao Zhou
2022-01-28 3:33 ` Zhou1, Tao
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.