* [PATCH 1/7] drm/amdgpu: add hdp ras structures
@ 2021-04-29 6:25 Hawking Zhang
2021-04-29 6:25 ` [PATCH 2/7] drm/amdgpu: add helpers for hdp ras init/fini Hawking Zhang
` (6 more replies)
0 siblings, 7 replies; 15+ messages in thread
From: Hawking Zhang @ 2021-04-29 6:25 UTC (permalink / raw)
To: Alex Deucher, Dennis Li, John Clements, amd-gfx; +Cc: Hawking Zhang
centralize all hdp ras operation to ras_funcs
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index 43caf9f..c89cf8d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -23,6 +23,14 @@
#ifndef __AMDGPU_HDP_H__
#define __AMDGPU_HDP_H__
+struct amdgpu_hdp_ras_funcs {
+ int (*ras_late_init)(struct amdgpu_device *adev);
+ void (*ras_fini)(struct amdgpu_device *adev);
+ void (*query_ras_error_count)(struct amdgpu_device *adev,
+ void *ras_error_status);
+ void (*reset_ras_error_count)(struct amdgpu_device *adev);
+};
+
struct amdgpu_hdp_funcs {
void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
void (*invalidate_hdp)(struct amdgpu_device *adev,
@@ -34,7 +42,9 @@ struct amdgpu_hdp_funcs {
};
struct amdgpu_hdp {
+ struct ras_common_if *ras_if;
const struct amdgpu_hdp_funcs *funcs;
+ const struct amdgpu_hdp_ras_funcs *ras_funcs;
};
#endif /* __AMDGPU_HDP_H__ */
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 2/7] drm/amdgpu: add helpers for hdp ras init/fini
2021-04-29 6:25 [PATCH 1/7] drm/amdgpu: add hdp ras structures Hawking Zhang
@ 2021-04-29 6:25 ` Hawking Zhang
2021-04-29 8:22 ` Li, Dennis
2021-04-29 6:25 ` [PATCH 3/7] drm/amdgpu: implement hdp v4_0 ras functions Hawking Zhang
` (5 subsequent siblings)
6 siblings, 1 reply; 15+ messages in thread
From: Hawking Zhang @ 2021-04-29 6:25 UTC (permalink / raw)
To: Alex Deucher, Dennis Li, John Clements, amd-gfx; +Cc: Hawking Zhang
hdp ras init/fini are common functions that
can be shared among hdp generations
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c | 69 +++++++++++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h | 2 +
3 files changed, 72 insertions(+), 1 deletion(-)
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index ee85e8a..418e674 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -56,7 +56,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
- amdgpu_fw_attestation.o amdgpu_securedisplay.o
+ amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o
amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
new file mode 100644
index 0000000..1d50d53
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "hdp_err_count",
+ };
+
+ if (!adev->hdp.ras_if) {
+ adev->hdp.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->hdp.ras_if)
+ return -ENOMEM;
+ adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP;
+ adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->hdp.ras_if->sub_block_index = 0;
+ strcpy(adev->hdp.ras_if->name, "hdp");
+ }
+ ih_info.head = fs_info.head = *adev->hdp.ras_if;
+ r = amdgpu_ras_late_init(adev, adev->hdp.ras_if,
+ &fs_info, &ih_info);
+ if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) {
+ kfree(adev->hdp.ras_if);
+ adev->hdp.ras_if = NULL;
+ }
+
+ return r;
+}
+
+void amdgpu_hdp_ras_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
+ adev->hdp.ras_if) {
+ struct ras_common_if *ras_if = adev->hdp.ras_if;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index c89cf8d..ba6f272 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -47,4 +47,6 @@ struct amdgpu_hdp {
const struct amdgpu_hdp_ras_funcs *ras_funcs;
};
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_hdp_ras_fini(struct amdgpu_device *adev);
#endif /* __AMDGPU_HDP_H__ */
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 3/7] drm/amdgpu: implement hdp v4_0 ras functions
2021-04-29 6:25 [PATCH 1/7] drm/amdgpu: add hdp ras structures Hawking Zhang
2021-04-29 6:25 ` [PATCH 2/7] drm/amdgpu: add helpers for hdp ras init/fini Hawking Zhang
@ 2021-04-29 6:25 ` Hawking Zhang
2021-04-29 8:28 ` Li, Dennis
2021-04-29 6:25 ` [PATCH 4/7] drm/amdgpu: initialize " Hawking Zhang
` (4 subsequent siblings)
6 siblings, 1 reply; 15+ messages in thread
From: Hawking Zhang @ 2021-04-29 6:25 UTC (permalink / raw)
To: Alex Deucher, Dennis Li, John Clements, amd-gfx; +Cc: Hawking Zhang
implement hdp v4_0 ras functions, including
ras init/fini, query/reset_error_counter
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 30 ++++++++++++++++++++++++++++--
drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h | 1 +
2 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index edbd35d..330c0f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -59,12 +59,31 @@ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
}
+static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
+ return;
+
+ /* HDP SRAM errors are uncorrectable ones (i.e. fatal errors) */
+ err_data->ue_count += RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
+};
+
static void hdp_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
{
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
return;
- /*read back hdp ras counter to reset it to 0 */
- RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
+
+ if (adev->asic_type >= CHIP_ALDEBARAN)
+ WREG32_SOC15(HDP, 0, mmHDP_EDC_CNT, 0);
+ else
+ /*read back hdp ras counter to reset it to 0 */
+ RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
}
static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev,
@@ -130,6 +149,13 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev)
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
}
+const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs = {
+ .ras_late_init = amdgpu_hdp_ras_late_init,
+ .ras_fini = amdgpu_hdp_ras_fini,
+ .query_ras_error_count = hdp_v4_0_query_ras_error_count,
+ .reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
+};
+
const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
.flush_hdp = hdp_v4_0_flush_hdp,
.invalidate_hdp = hdp_v4_0_invalidate_hdp,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
index d1e6399..dc3a1b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
@@ -27,5 +27,6 @@
#include "soc15_common.h"
extern const struct amdgpu_hdp_funcs hdp_v4_0_funcs;
+extern const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs;
#endif
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 4/7] drm/amdgpu: initialize hdp v4_0 ras functions
2021-04-29 6:25 [PATCH 1/7] drm/amdgpu: add hdp ras structures Hawking Zhang
2021-04-29 6:25 ` [PATCH 2/7] drm/amdgpu: add helpers for hdp ras init/fini Hawking Zhang
2021-04-29 6:25 ` [PATCH 3/7] drm/amdgpu: implement hdp v4_0 ras functions Hawking Zhang
@ 2021-04-29 6:25 ` Hawking Zhang
2021-04-29 8:30 ` Li, Dennis
2021-04-29 6:25 ` [PATCH 5/7] drm/amdgpu: init/fini hdp v4_0 ras Hawking Zhang
` (3 subsequent siblings)
6 siblings, 1 reply; 15+ messages in thread
From: Hawking Zhang @ 2021-04-29 6:25 UTC (permalink / raw)
To: Alex Deucher, Dennis Li, John Clements, amd-gfx; +Cc: Hawking Zhang
hdp v4_0 support ras features
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 4da8b3d..8e0cab5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -53,6 +53,7 @@
#include "mmhub_v1_7.h"
#include "umc_v6_1.h"
#include "umc_v6_0.h"
+#include "hdp_v4_0.h"
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
@@ -1210,6 +1211,11 @@ static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
}
+static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs;
+}
+
static int gmc_v9_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1230,6 +1236,7 @@ static int gmc_v9_0_early_init(void *handle)
gmc_v9_0_set_mmhub_funcs(adev);
gmc_v9_0_set_mmhub_ras_funcs(adev);
gmc_v9_0_set_gfxhub_funcs(adev);
+ gmc_v9_0_set_hdp_ras_funcs(adev);
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end =
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 5/7] drm/amdgpu: init/fini hdp v4_0 ras
2021-04-29 6:25 [PATCH 1/7] drm/amdgpu: add hdp ras structures Hawking Zhang
` (2 preceding siblings ...)
2021-04-29 6:25 ` [PATCH 4/7] drm/amdgpu: initialize " Hawking Zhang
@ 2021-04-29 6:25 ` Hawking Zhang
2021-04-29 8:31 ` Li, Dennis
2021-04-29 6:25 ` [PATCH 6/7] drm/amdgpu: enable ras error count query and reset for HDP Hawking Zhang
` (2 subsequent siblings)
6 siblings, 1 reply; 15+ messages in thread
From: Hawking Zhang @ 2021-04-29 6:25 UTC (permalink / raw)
To: Alex Deucher, Dennis Li, John Clements, amd-gfx; +Cc: Hawking Zhang
invoke hdp v4_0 ras init in gmc late_init phase
while ras fini in gmc sw_fini phase
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index dfa67c2..697ab26 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -455,6 +455,13 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->ras_late_init) {
+ r = adev->hdp.ras_funcs->ras_late_init(adev);
+ if (r)
+ return r;
+ }
+
return 0;
}
@@ -471,6 +478,10 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
if (adev->gmc.xgmi.ras_funcs &&
adev->gmc.xgmi.ras_funcs->ras_fini)
adev->gmc.xgmi.ras_funcs->ras_fini(adev);
+
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->ras_fini)
+ adev->hdp.ras_funcs->ras_fini(adev);
}
/*
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 6/7] drm/amdgpu: enable ras error count query and reset for HDP
2021-04-29 6:25 [PATCH 1/7] drm/amdgpu: add hdp ras structures Hawking Zhang
` (3 preceding siblings ...)
2021-04-29 6:25 ` [PATCH 5/7] drm/amdgpu: init/fini hdp v4_0 ras Hawking Zhang
@ 2021-04-29 6:25 ` Hawking Zhang
2021-04-29 8:32 ` Li, Dennis
2021-04-29 6:25 ` [PATCH 7/7] drm/amdgpu: retired reset_ras_error_count from hdp callbacks Hawking Zhang
2021-04-29 7:50 ` [PATCH 1/7] drm/amdgpu: add hdp ras structures Clements, John
6 siblings, 1 reply; 15+ messages in thread
From: Hawking Zhang @ 2021-04-29 6:25 UTC (permalink / raw)
To: Alex Deucher, Dennis Li, John Clements, amd-gfx; +Cc: Hawking Zhang
add hdp block ras error query and reset support in
amdgpu ras error count query and reset interface
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 10 ++++++++++
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++++
drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ---
3 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index ae9fb20..984e827 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -890,6 +890,11 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
adev->gmc.xgmi.ras_funcs->query_ras_error_count)
adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data);
break;
+ case AMDGPU_RAS_BLOCK__HDP:
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->query_ras_error_count)
+ adev->hdp.ras_funcs->query_ras_error_count(adev, &err_data);
+ break;
default:
break;
}
@@ -967,6 +972,11 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
if (adev->sdma.funcs->reset_ras_error_count)
adev->sdma.funcs->reset_ras_error_count(adev);
break;
+ case AMDGPU_RAS_BLOCK__HDP:
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->reset_ras_error_count)
+ adev->hdp.ras_funcs->reset_ras_error_count(adev);
+ break;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 8e0cab5..3daf806 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1276,6 +1276,10 @@ static int gmc_v9_0_late_init(void *handle)
adev->mmhub.ras_funcs->reset_ras_error_count)
adev->mmhub.ras_funcs->reset_ras_error_count(adev);
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->reset_ras_error_count)
+ adev->hdp.ras_funcs->reset_ras_error_count(adev);
+
r = amdgpu_gmc_ras_late_init(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index d80e12b..28e9f6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1521,9 +1521,6 @@ static int soc15_common_late_init(void *handle)
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_get_irq(adev);
- if (adev->hdp.funcs->reset_ras_error_count)
- adev->hdp.funcs->reset_ras_error_count(adev);
-
if (adev->nbio.ras_funcs &&
adev->nbio.ras_funcs->ras_late_init)
r = adev->nbio.ras_funcs->ras_late_init(adev);
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 7/7] drm/amdgpu: retired reset_ras_error_count from hdp callbacks
2021-04-29 6:25 [PATCH 1/7] drm/amdgpu: add hdp ras structures Hawking Zhang
` (4 preceding siblings ...)
2021-04-29 6:25 ` [PATCH 6/7] drm/amdgpu: enable ras error count query and reset for HDP Hawking Zhang
@ 2021-04-29 6:25 ` Hawking Zhang
2021-04-29 8:31 ` Li, Dennis
2021-04-29 7:50 ` [PATCH 1/7] drm/amdgpu: add hdp ras structures Clements, John
6 siblings, 1 reply; 15+ messages in thread
From: Hawking Zhang @ 2021-04-29 6:25 UTC (permalink / raw)
To: Alex Deucher, Dennis Li, John Clements, amd-gfx; +Cc: Hawking Zhang
It was moved to hdp ras callbacks
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h | 1 -
drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 1 -
2 files changed, 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index ba6f272..7ec99d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -35,7 +35,6 @@ struct amdgpu_hdp_funcs {
void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
void (*invalidate_hdp)(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
- void (*reset_ras_error_count)(struct amdgpu_device *adev);
void (*update_clock_gating)(struct amdgpu_device *adev, bool enable);
void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
void (*init_registers)(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index 330c0f0..74b90cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -159,7 +159,6 @@ const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs = {
const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
.flush_hdp = hdp_v4_0_flush_hdp,
.invalidate_hdp = hdp_v4_0_invalidate_hdp,
- .reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
.update_clock_gating = hdp_v4_0_update_clock_gating,
.get_clock_gating_state = hdp_v4_0_get_clockgating_state,
.init_registers = hdp_v4_0_init_registers,
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* RE: [PATCH 1/7] drm/amdgpu: add hdp ras structures
2021-04-29 6:25 [PATCH 1/7] drm/amdgpu: add hdp ras structures Hawking Zhang
` (5 preceding siblings ...)
2021-04-29 6:25 ` [PATCH 7/7] drm/amdgpu: retired reset_ras_error_count from hdp callbacks Hawking Zhang
@ 2021-04-29 7:50 ` Clements, John
6 siblings, 0 replies; 15+ messages in thread
From: Clements, John @ 2021-04-29 7:50 UTC (permalink / raw)
To: Zhang, Hawking, Deucher, Alexander, Li, Dennis, amd-gfx; +Cc: Zhang, Hawking
[AMD Official Use Only - Internal Distribution Only]
Series is:
Reviewed-by: John Clements <John.Clements@amd.com>
-----Original Message-----
From: Hawking Zhang <Hawking.Zhang@amd.com>
Sent: Thursday, April 29, 2021 2:26 PM
To: Deucher, Alexander <Alexander.Deucher@amd.com>; Li, Dennis <Dennis.Li@amd.com>; Clements, John <John.Clements@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>
Subject: [PATCH 1/7] drm/amdgpu: add hdp ras structures
centralize all hdp ras operation to ras_funcs
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index 43caf9f..c89cf8d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -23,6 +23,14 @@
#ifndef __AMDGPU_HDP_H__
#define __AMDGPU_HDP_H__
+struct amdgpu_hdp_ras_funcs {
+ int (*ras_late_init)(struct amdgpu_device *adev);
+ void (*ras_fini)(struct amdgpu_device *adev);
+ void (*query_ras_error_count)(struct amdgpu_device *adev,
+ void *ras_error_status);
+ void (*reset_ras_error_count)(struct amdgpu_device *adev); };
+
struct amdgpu_hdp_funcs {
void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
void (*invalidate_hdp)(struct amdgpu_device *adev, @@ -34,7 +42,9 @@ struct amdgpu_hdp_funcs { };
struct amdgpu_hdp {
+ struct ras_common_if *ras_if;
const struct amdgpu_hdp_funcs *funcs;
+ const struct amdgpu_hdp_ras_funcs *ras_funcs;
};
#endif /* __AMDGPU_HDP_H__ */
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* RE: [PATCH 2/7] drm/amdgpu: add helpers for hdp ras init/fini
2021-04-29 6:25 ` [PATCH 2/7] drm/amdgpu: add helpers for hdp ras init/fini Hawking Zhang
@ 2021-04-29 8:22 ` Li, Dennis
2021-04-29 12:24 ` Zhang, Hawking
0 siblings, 1 reply; 15+ messages in thread
From: Li, Dennis @ 2021-04-29 8:22 UTC (permalink / raw)
To: Zhang, Hawking, Deucher, Alexander, Clements, John, amd-gfx
Cc: Zhang, Hawking
[AMD Official Use Only - Internal Distribution Only]
>>+ r = amdgpu_ras_late_init(adev, adev->hdp.ras_if,
>>+ &fs_info, &ih_info);
>>+ if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) {
>>+ kfree(adev->hdp.ras_if);
>>+ adev->hdp.ras_if = NULL;
>>+ }
It is better to move amdgpu_ras_is_supported more early, to avoid redundant memory allocation when HDP doesn't support RAS. Except this, it looks good to me.
Reviewed-by: Dennis Li <Dennis.Li@amd.com>
-----Original Message-----
From: Hawking Zhang <Hawking.Zhang@amd.com>
Sent: Thursday, April 29, 2021 2:26 PM
To: Deucher, Alexander <Alexander.Deucher@amd.com>; Li, Dennis <Dennis.Li@amd.com>; Clements, John <John.Clements@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>
Subject: [PATCH 2/7] drm/amdgpu: add helpers for hdp ras init/fini
hdp ras init/fini are common functions that can be shared among hdp generations
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c | 69 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h | 2 +
3 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index ee85e8a..418e674 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -56,7 +56,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
- amdgpu_fw_attestation.o amdgpu_securedisplay.o
+ amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o
amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
new file mode 100644
index 0000000..1d50d53
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+obtaining a
+ * copy of this software and associated documentation files (the
+"Software"),
+ * to deal in the Software without restriction, including without
+limitation
+ * the rights to use, copy, modify, merge, publish, distribute,
+sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom
+the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
+DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev) {
+ int r;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "hdp_err_count",
+ };
+
+ if (!adev->hdp.ras_if) {
+ adev->hdp.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->hdp.ras_if)
+ return -ENOMEM;
+ adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP;
+ adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->hdp.ras_if->sub_block_index = 0;
+ strcpy(adev->hdp.ras_if->name, "hdp");
+ }
+ ih_info.head = fs_info.head = *adev->hdp.ras_if;
+ r = amdgpu_ras_late_init(adev, adev->hdp.ras_if,
+ &fs_info, &ih_info);
+ if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) {
+ kfree(adev->hdp.ras_if);
+ adev->hdp.ras_if = NULL;
+ }
It is better to move amdgpu_ras_is_supported more early, to avoid redundant memory allocation when HDP doesn't support RAS.
+
+ return r;
+}
+
+void amdgpu_hdp_ras_fini(struct amdgpu_device *adev) {
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
+ adev->hdp.ras_if) {
+ struct ras_common_if *ras_if = adev->hdp.ras_if;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index c89cf8d..ba6f272 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -47,4 +47,6 @@ struct amdgpu_hdp {
const struct amdgpu_hdp_ras_funcs *ras_funcs;
};
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev); void
+amdgpu_hdp_ras_fini(struct amdgpu_device *adev);
#endif /* __AMDGPU_HDP_H__ */
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* RE: [PATCH 3/7] drm/amdgpu: implement hdp v4_0 ras functions
2021-04-29 6:25 ` [PATCH 3/7] drm/amdgpu: implement hdp v4_0 ras functions Hawking Zhang
@ 2021-04-29 8:28 ` Li, Dennis
0 siblings, 0 replies; 15+ messages in thread
From: Li, Dennis @ 2021-04-29 8:28 UTC (permalink / raw)
To: Zhang, Hawking, Deucher, Alexander, Clements, John, amd-gfx
Cc: Zhang, Hawking
[AMD Official Use Only - Internal Distribution Only]
This patch looks good to me.
Reviewed-by: Dennis Li <Dennis.Li@amd.com>
-----Original Message-----
From: Hawking Zhang <Hawking.Zhang@amd.com>
Sent: Thursday, April 29, 2021 2:26 PM
To: Deucher, Alexander <Alexander.Deucher@amd.com>; Li, Dennis <Dennis.Li@amd.com>; Clements, John <John.Clements@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>
Subject: [PATCH 3/7] drm/amdgpu: implement hdp v4_0 ras functions
implement hdp v4_0 ras functions, including ras init/fini, query/reset_error_counter
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 30 ++++++++++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h | 1 +
2 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index edbd35d..330c0f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -59,12 +59,31 @@ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); }
+static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data
+*)ras_error_status;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
+ return;
+
+ /* HDP SRAM errors are uncorrectable ones (i.e. fatal errors) */
+ err_data->ue_count += RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT); };
+
static void hdp_v4_0_reset_ras_error_count(struct amdgpu_device *adev) {
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
return;
- /*read back hdp ras counter to reset it to 0 */
- RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
+
+ if (adev->asic_type >= CHIP_ALDEBARAN)
+ WREG32_SOC15(HDP, 0, mmHDP_EDC_CNT, 0);
+ else
+ /*read back hdp ras counter to reset it to 0 */
+ RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
}
static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev, @@ -130,6 +149,13 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev)
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); }
+const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs = {
+ .ras_late_init = amdgpu_hdp_ras_late_init,
+ .ras_fini = amdgpu_hdp_ras_fini,
+ .query_ras_error_count = hdp_v4_0_query_ras_error_count,
+ .reset_ras_error_count = hdp_v4_0_reset_ras_error_count, };
+
const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
.flush_hdp = hdp_v4_0_flush_hdp,
.invalidate_hdp = hdp_v4_0_invalidate_hdp, diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
index d1e6399..dc3a1b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
@@ -27,5 +27,6 @@
#include "soc15_common.h"
extern const struct amdgpu_hdp_funcs hdp_v4_0_funcs;
+extern const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs;
#endif
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* RE: [PATCH 4/7] drm/amdgpu: initialize hdp v4_0 ras functions
2021-04-29 6:25 ` [PATCH 4/7] drm/amdgpu: initialize " Hawking Zhang
@ 2021-04-29 8:30 ` Li, Dennis
0 siblings, 0 replies; 15+ messages in thread
From: Li, Dennis @ 2021-04-29 8:30 UTC (permalink / raw)
To: Zhang, Hawking, Deucher, Alexander, Clements, John, amd-gfx
Cc: Zhang, Hawking
[AMD Official Use Only - Internal Distribution Only]
This patch looks good to me.
Reviewed-by: Dennis Li <Dennis.Li@amd.com>
-----Original Message-----
From: Hawking Zhang <Hawking.Zhang@amd.com>
Sent: Thursday, April 29, 2021 2:26 PM
To: Deucher, Alexander <Alexander.Deucher@amd.com>; Li, Dennis <Dennis.Li@amd.com>; Clements, John <John.Clements@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>
Subject: [PATCH 4/7] drm/amdgpu: initialize hdp v4_0 ras functions
hdp v4_0 support ras features
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 4da8b3d..8e0cab5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -53,6 +53,7 @@
#include "mmhub_v1_7.h"
#include "umc_v6_1.h"
#include "umc_v6_0.h"
+#include "hdp_v4_0.h"
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
@@ -1210,6 +1211,11 @@ static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
adev->gfxhub.funcs = &gfxhub_v1_0_funcs; }
+static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev) {
+ adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs; }
+
static int gmc_v9_0_early_init(void *handle) {
struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1230,6 +1236,7 @@ static int gmc_v9_0_early_init(void *handle)
gmc_v9_0_set_mmhub_funcs(adev);
gmc_v9_0_set_mmhub_ras_funcs(adev);
gmc_v9_0_set_gfxhub_funcs(adev);
+ gmc_v9_0_set_hdp_ras_funcs(adev);
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end =
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* RE: [PATCH 5/7] drm/amdgpu: init/fini hdp v4_0 ras
2021-04-29 6:25 ` [PATCH 5/7] drm/amdgpu: init/fini hdp v4_0 ras Hawking Zhang
@ 2021-04-29 8:31 ` Li, Dennis
0 siblings, 0 replies; 15+ messages in thread
From: Li, Dennis @ 2021-04-29 8:31 UTC (permalink / raw)
To: Zhang, Hawking, Deucher, Alexander, Clements, John, amd-gfx
Cc: Zhang, Hawking
[AMD Official Use Only - Internal Distribution Only]
This patch looks good to me.
Reviewed-by: Dennis Li <Dennis.Li@amd.com>
-----Original Message-----
From: Hawking Zhang <Hawking.Zhang@amd.com>
Sent: Thursday, April 29, 2021 2:26 PM
To: Deucher, Alexander <Alexander.Deucher@amd.com>; Li, Dennis <Dennis.Li@amd.com>; Clements, John <John.Clements@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>
Subject: [PATCH 5/7] drm/amdgpu: init/fini hdp v4_0 ras
invoke hdp v4_0 ras init in gmc late_init phase while ras fini in gmc sw_fini phase
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index dfa67c2..697ab26 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -455,6 +455,13 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->ras_late_init) {
+ r = adev->hdp.ras_funcs->ras_late_init(adev);
+ if (r)
+ return r;
+ }
+
return 0;
}
@@ -471,6 +478,10 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
if (adev->gmc.xgmi.ras_funcs &&
adev->gmc.xgmi.ras_funcs->ras_fini)
adev->gmc.xgmi.ras_funcs->ras_fini(adev);
+
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->ras_fini)
+ adev->hdp.ras_funcs->ras_fini(adev);
}
/*
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* RE: [PATCH 7/7] drm/amdgpu: retired reset_ras_error_count from hdp callbacks
2021-04-29 6:25 ` [PATCH 7/7] drm/amdgpu: retired reset_ras_error_count from hdp callbacks Hawking Zhang
@ 2021-04-29 8:31 ` Li, Dennis
0 siblings, 0 replies; 15+ messages in thread
From: Li, Dennis @ 2021-04-29 8:31 UTC (permalink / raw)
To: Zhang, Hawking, Deucher, Alexander, Clements, John, amd-gfx
Cc: Zhang, Hawking
[AMD Official Use Only - Internal Distribution Only]
This patch looks good to me.
Reviewed-by: Dennis Li <Dennis.Li@amd.com>
-----Original Message-----
From: Hawking Zhang <Hawking.Zhang@amd.com>
Sent: Thursday, April 29, 2021 2:26 PM
To: Deucher, Alexander <Alexander.Deucher@amd.com>; Li, Dennis <Dennis.Li@amd.com>; Clements, John <John.Clements@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>
Subject: [PATCH 7/7] drm/amdgpu: retired reset_ras_error_count from hdp callbacks
It was moved to hdp ras callbacks
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h | 1 -
drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 1 -
2 files changed, 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index ba6f272..7ec99d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -35,7 +35,6 @@ struct amdgpu_hdp_funcs {
void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
void (*invalidate_hdp)(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
- void (*reset_ras_error_count)(struct amdgpu_device *adev);
void (*update_clock_gating)(struct amdgpu_device *adev, bool enable);
void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
void (*init_registers)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index 330c0f0..74b90cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -159,7 +159,6 @@ const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs = { const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
.flush_hdp = hdp_v4_0_flush_hdp,
.invalidate_hdp = hdp_v4_0_invalidate_hdp,
- .reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
.update_clock_gating = hdp_v4_0_update_clock_gating,
.get_clock_gating_state = hdp_v4_0_get_clockgating_state,
.init_registers = hdp_v4_0_init_registers,
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* RE: [PATCH 6/7] drm/amdgpu: enable ras error count query and reset for HDP
2021-04-29 6:25 ` [PATCH 6/7] drm/amdgpu: enable ras error count query and reset for HDP Hawking Zhang
@ 2021-04-29 8:32 ` Li, Dennis
0 siblings, 0 replies; 15+ messages in thread
From: Li, Dennis @ 2021-04-29 8:32 UTC (permalink / raw)
To: Zhang, Hawking, Deucher, Alexander, Clements, John, amd-gfx
Cc: Zhang, Hawking
[AMD Official Use Only - Internal Distribution Only]
This patch looks good to me.
Reviewed-by: Dennis Li <Dennis.Li@amd.com>
-----Original Message-----
From: Hawking Zhang <Hawking.Zhang@amd.com>
Sent: Thursday, April 29, 2021 2:26 PM
To: Deucher, Alexander <Alexander.Deucher@amd.com>; Li, Dennis <Dennis.Li@amd.com>; Clements, John <John.Clements@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>
Subject: [PATCH 6/7] drm/amdgpu: enable ras error count query and reset for HDP
add hdp block ras error query and reset support in amdgpu ras error count query and reset interface
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 10 ++++++++++
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++++
drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ---
3 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index ae9fb20..984e827 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -890,6 +890,11 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
adev->gmc.xgmi.ras_funcs->query_ras_error_count)
adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data);
break;
+ case AMDGPU_RAS_BLOCK__HDP:
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->query_ras_error_count)
+ adev->hdp.ras_funcs->query_ras_error_count(adev, &err_data);
+ break;
default:
break;
}
@@ -967,6 +972,11 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
if (adev->sdma.funcs->reset_ras_error_count)
adev->sdma.funcs->reset_ras_error_count(adev);
break;
+ case AMDGPU_RAS_BLOCK__HDP:
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->reset_ras_error_count)
+ adev->hdp.ras_funcs->reset_ras_error_count(adev);
+ break;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 8e0cab5..3daf806 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1276,6 +1276,10 @@ static int gmc_v9_0_late_init(void *handle)
adev->mmhub.ras_funcs->reset_ras_error_count)
adev->mmhub.ras_funcs->reset_ras_error_count(adev);
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->reset_ras_error_count)
+ adev->hdp.ras_funcs->reset_ras_error_count(adev);
+
r = amdgpu_gmc_ras_late_init(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index d80e12b..28e9f6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1521,9 +1521,6 @@ static int soc15_common_late_init(void *handle)
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_get_irq(adev);
- if (adev->hdp.funcs->reset_ras_error_count)
- adev->hdp.funcs->reset_ras_error_count(adev);
-
if (adev->nbio.ras_funcs &&
adev->nbio.ras_funcs->ras_late_init)
r = adev->nbio.ras_funcs->ras_late_init(adev);
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* RE: [PATCH 2/7] drm/amdgpu: add helpers for hdp ras init/fini
2021-04-29 8:22 ` Li, Dennis
@ 2021-04-29 12:24 ` Zhang, Hawking
0 siblings, 0 replies; 15+ messages in thread
From: Zhang, Hawking @ 2021-04-29 12:24 UTC (permalink / raw)
To: Li, Dennis, Deucher, Alexander, Clements, John, amd-gfx
[AMD Public Use]
Hi Dennis,
The memory allocation for ras_if is needed even the block mask is not set, because kernel need to issue disable_feature command to RAS TA in amdgpu_ras_late_init. e.g. to set GFX EDC mode to bypass mode.
Regards,
Hawking
-----Original Message-----
From: Li, Dennis <Dennis.Li@amd.com>
Sent: Thursday, April 29, 2021 16:23
To: Zhang, Hawking <Hawking.Zhang@amd.com>; Deucher, Alexander <Alexander.Deucher@amd.com>; Clements, John <John.Clements@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>
Subject: RE: [PATCH 2/7] drm/amdgpu: add helpers for hdp ras init/fini
[AMD Official Use Only - Internal Distribution Only]
>>+ r = amdgpu_ras_late_init(adev, adev->hdp.ras_if,
>>+ &fs_info, &ih_info);
>>+ if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) {
>>+ kfree(adev->hdp.ras_if);
>>+ adev->hdp.ras_if = NULL;
>>+ }
It is better to move amdgpu_ras_is_supported more early, to avoid redundant memory allocation when HDP doesn't support RAS. Except this, it looks good to me.
Reviewed-by: Dennis Li <Dennis.Li@amd.com>
-----Original Message-----
From: Hawking Zhang <Hawking.Zhang@amd.com>
Sent: Thursday, April 29, 2021 2:26 PM
To: Deucher, Alexander <Alexander.Deucher@amd.com>; Li, Dennis <Dennis.Li@amd.com>; Clements, John <John.Clements@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>
Subject: [PATCH 2/7] drm/amdgpu: add helpers for hdp ras init/fini
hdp ras init/fini are common functions that can be shared among hdp generations
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c | 69 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h | 2 +
3 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index ee85e8a..418e674 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -56,7 +56,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
- amdgpu_fw_attestation.o amdgpu_securedisplay.o
+ amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o
amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
new file mode 100644
index 0000000..1d50d53
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+obtaining a
+ * copy of this software and associated documentation files (the
+"Software"),
+ * to deal in the Software without restriction, including without
+limitation
+ * the rights to use, copy, modify, merge, publish, distribute,
+sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom
+the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
+DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev) {
+ int r;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "hdp_err_count",
+ };
+
+ if (!adev->hdp.ras_if) {
+ adev->hdp.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->hdp.ras_if)
+ return -ENOMEM;
+ adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP;
+ adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->hdp.ras_if->sub_block_index = 0;
+ strcpy(adev->hdp.ras_if->name, "hdp");
+ }
+ ih_info.head = fs_info.head = *adev->hdp.ras_if;
+ r = amdgpu_ras_late_init(adev, adev->hdp.ras_if,
+ &fs_info, &ih_info);
+ if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) {
+ kfree(adev->hdp.ras_if);
+ adev->hdp.ras_if = NULL;
+ }
It is better to move amdgpu_ras_is_supported more early, to avoid redundant memory allocation when HDP doesn't support RAS.
+
+ return r;
+}
+
+void amdgpu_hdp_ras_fini(struct amdgpu_device *adev) {
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
+ adev->hdp.ras_if) {
+ struct ras_common_if *ras_if = adev->hdp.ras_if;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index c89cf8d..ba6f272 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -47,4 +47,6 @@ struct amdgpu_hdp {
const struct amdgpu_hdp_ras_funcs *ras_funcs;
};
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev); void
+amdgpu_hdp_ras_fini(struct amdgpu_device *adev);
#endif /* __AMDGPU_HDP_H__ */
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
end of thread, other threads:[~2021-04-29 12:24 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-29 6:25 [PATCH 1/7] drm/amdgpu: add hdp ras structures Hawking Zhang
2021-04-29 6:25 ` [PATCH 2/7] drm/amdgpu: add helpers for hdp ras init/fini Hawking Zhang
2021-04-29 8:22 ` Li, Dennis
2021-04-29 12:24 ` Zhang, Hawking
2021-04-29 6:25 ` [PATCH 3/7] drm/amdgpu: implement hdp v4_0 ras functions Hawking Zhang
2021-04-29 8:28 ` Li, Dennis
2021-04-29 6:25 ` [PATCH 4/7] drm/amdgpu: initialize " Hawking Zhang
2021-04-29 8:30 ` Li, Dennis
2021-04-29 6:25 ` [PATCH 5/7] drm/amdgpu: init/fini hdp v4_0 ras Hawking Zhang
2021-04-29 8:31 ` Li, Dennis
2021-04-29 6:25 ` [PATCH 6/7] drm/amdgpu: enable ras error count query and reset for HDP Hawking Zhang
2021-04-29 8:32 ` Li, Dennis
2021-04-29 6:25 ` [PATCH 7/7] drm/amdgpu: retired reset_ras_error_count from hdp callbacks Hawking Zhang
2021-04-29 8:31 ` Li, Dennis
2021-04-29 7:50 ` [PATCH 1/7] drm/amdgpu: add hdp ras structures Clements, John
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).