* [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV
@ 2022-12-08 7:51 Tao Zhou
2022-12-08 7:51 ` [PATCH 2/7] drm/amdgpu: add RAS poison consumption handler for NV SRIOV Tao Zhou
` (5 more replies)
0 siblings, 6 replies; 8+ messages in thread
From: Tao Zhou @ 2022-12-08 7:51 UTC (permalink / raw)
To: amd-gfx, hawking.zhang, stanley.yang, Gavin.Wan, Vignesh.Chander,
david.yu
Cc: Tao Zhou
Send message to host and host will handle it.
v2: split it into two parts, one for mxgpu ai and another one for common
poison consumption handler.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 +
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 6 ++++++
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h | 1 +
3 files changed, 8 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 2b9d806e23af..b9e9480448af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -88,6 +88,7 @@ struct amdgpu_virt_ops {
int (*wait_reset)(struct amdgpu_device *adev);
void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req,
u32 data1, u32 data2, u32 data3);
+ void (*ras_poison_handler)(struct amdgpu_device *adev);
};
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 12906ba74462..63725b2ebc03 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -404,6 +404,11 @@ static int xgpu_ai_request_init_data(struct amdgpu_device *adev)
return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
}
+static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev)
+{
+ xgpu_ai_send_access_requests(adev, IDH_RAS_POISON);
+}
+
const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
.req_full_gpu = xgpu_ai_request_full_gpu_access,
.rel_full_gpu = xgpu_ai_release_full_gpu_access,
@@ -411,4 +416,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
.wait_reset = NULL,
.trans_msg = xgpu_ai_mailbox_trans_msg,
.req_init_data = xgpu_ai_request_init_data,
+ .ras_poison_handler = xgpu_ai_ras_poison_handler,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index fa7e13e0459e..af1a784696bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -39,6 +39,7 @@ enum idh_request {
IDH_LOG_VF_ERROR = 200,
IDH_READY_TO_RESET = 201,
+ IDH_RAS_POISON = 202,
};
enum idh_event {
--
2.35.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/7] drm/amdgpu: add RAS poison consumption handler for NV SRIOV
2022-12-08 7:51 [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV Tao Zhou
@ 2022-12-08 7:51 ` Tao Zhou
2022-12-08 7:51 ` [PATCH 3/7] drm/amdgpu: add RAS poison consumption handler for SRIOV Tao Zhou
` (4 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Tao Zhou @ 2022-12-08 7:51 UTC (permalink / raw)
To: amd-gfx, hawking.zhang, stanley.yang, Gavin.Wan, Vignesh.Chander,
david.yu
Cc: Tao Zhou
Send handling request to host.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 6 ++++++
drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h | 1 +
2 files changed, 7 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index e07757eea7ad..cae1aaa4ddb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -426,6 +426,11 @@ void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev)
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
}
+static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev)
+{
+ xgpu_nv_send_access_requests(adev, IDH_RAS_POISON);
+}
+
const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
.req_full_gpu = xgpu_nv_request_full_gpu_access,
.rel_full_gpu = xgpu_nv_release_full_gpu_access,
@@ -433,4 +438,5 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
.reset_gpu = xgpu_nv_request_reset,
.wait_reset = NULL,
.trans_msg = xgpu_nv_mailbox_trans_msg,
+ .ras_poison_handler = xgpu_nv_ras_poison_handler,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index 73887b0aa1d6..d0221ce08769 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -39,6 +39,7 @@ enum idh_request {
IDH_LOG_VF_ERROR = 200,
IDH_READY_TO_RESET = 201,
+ IDH_RAS_POISON = 202,
};
enum idh_event {
--
2.35.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/7] drm/amdgpu: add RAS poison consumption handler for SRIOV
2022-12-08 7:51 [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV Tao Zhou
2022-12-08 7:51 ` [PATCH 2/7] drm/amdgpu: add RAS poison consumption handler for NV SRIOV Tao Zhou
@ 2022-12-08 7:51 ` Tao Zhou
2022-12-08 7:51 ` [PATCH 4/7] drm/amdgpu: add VCN " Tao Zhou
` (3 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Tao Zhou @ 2022-12-08 7:51 UTC (permalink / raw)
To: amd-gfx, hawking.zhang, stanley.yang, Gavin.Wan, Vignesh.Chander,
david.yu
Cc: Tao Zhou
Send message to PF if VF receives RAS poison consumption interrupt.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 44 +++++++++++++++----------
1 file changed, 26 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index f76c19fc0392..1c7fcb4f2380 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -169,25 +169,33 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset)
{
int ret = AMDGPU_RAS_SUCCESS;
- if (!adev->gmc.xgmi.connected_to_cpu) {
- struct ras_err_data err_data = {0, 0, 0, NULL};
- struct ras_common_if head = {
- .block = AMDGPU_RAS_BLOCK__UMC,
- };
- struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
-
- ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
-
- if (ret == AMDGPU_RAS_SUCCESS && obj) {
- obj->err_data.ue_count += err_data.ue_count;
- obj->err_data.ce_count += err_data.ce_count;
+ if (!amdgpu_sriov_vf(adev)) {
+ if (!adev->gmc.xgmi.connected_to_cpu) {
+ struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct ras_common_if head = {
+ .block = AMDGPU_RAS_BLOCK__UMC,
+ };
+ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
+
+ ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
+
+ if (ret == AMDGPU_RAS_SUCCESS && obj) {
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+ }
+ } else if (reset) {
+ /* MCA poison handler is only responsible for GPU reset,
+ * let MCA notifier do page retirement.
+ */
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+ amdgpu_ras_reset_gpu(adev);
}
- } else if (reset) {
- /* MCA poison handler is only responsible for GPU reset,
- * let MCA notifier do page retirement.
- */
- kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- amdgpu_ras_reset_gpu(adev);
+ } else {
+ if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+ adev->virt.ops->ras_poison_handler(adev);
+ else
+ dev_warn(adev->dev,
+ "No ras_poison_handler interface in SRIOV!\n");
}
return ret;
--
2.35.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 4/7] drm/amdgpu: add VCN poison consumption handler for SRIOV
2022-12-08 7:51 [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV Tao Zhou
2022-12-08 7:51 ` [PATCH 2/7] drm/amdgpu: add RAS poison consumption handler for NV SRIOV Tao Zhou
2022-12-08 7:51 ` [PATCH 3/7] drm/amdgpu: add RAS poison consumption handler for SRIOV Tao Zhou
@ 2022-12-08 7:51 ` Tao Zhou
2022-12-08 7:51 ` [PATCH 5/7] drm/amdgpu: skip RAS error injection in SRIOV Tao Zhou
` (2 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Tao Zhou @ 2022-12-08 7:51 UTC (permalink / raw)
To: amd-gfx, hawking.zhang, stanley.yang, Gavin.Wan, Vignesh.Chander,
david.yu
Cc: Tao Zhou
Inform host and let host handle consumption interrupt.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 72fa14ff862f..a23e26b272b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -1246,8 +1246,16 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
if (!ras_if)
return 0;
- ih_data.head = *ras_if;
- amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ if (!amdgpu_sriov_vf(adev)) {
+ ih_data.head = *ras_if;
+ amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+ } else {
+ if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+ adev->virt.ops->ras_poison_handler(adev);
+ else
+ dev_warn(adev->dev,
+ "No ras_poison_handler interface in SRIOV for VCN!\n");
+ }
return 0;
}
--
2.35.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 5/7] drm/amdgpu: skip RAS error injection in SRIOV
2022-12-08 7:51 [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV Tao Zhou
` (2 preceding siblings ...)
2022-12-08 7:51 ` [PATCH 4/7] drm/amdgpu: add VCN " Tao Zhou
@ 2022-12-08 7:51 ` Tao Zhou
2022-12-08 7:51 ` [PATCH 6/7] drm/amdgpu: update VCN/JPEG RAS setting Tao Zhou
2022-12-08 7:51 ` [PATCH 7/7] drm/amdgpu: define RAS poison mode query function Tao Zhou
5 siblings, 0 replies; 8+ messages in thread
From: Tao Zhou @ 2022-12-08 7:51 UTC (permalink / raw)
To: amd-gfx, hawking.zhang, stanley.yang, Gavin.Wan, Vignesh.Chander,
david.yu
Cc: Tao Zhou
Injection on guest is not allowed.
v2: return directly in SRIOV environment.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index ad490c1e2f57..4e450e0b14fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1087,6 +1087,10 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
info->head.block,
info->head.sub_block_index);
+ /* inject on guest isn't allowed, return success directly */
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
if (!obj)
return -EINVAL;
--
2.35.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 6/7] drm/amdgpu: update VCN/JPEG RAS setting
2022-12-08 7:51 [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV Tao Zhou
` (3 preceding siblings ...)
2022-12-08 7:51 ` [PATCH 5/7] drm/amdgpu: skip RAS error injection in SRIOV Tao Zhou
@ 2022-12-08 7:51 ` Tao Zhou
2022-12-08 7:51 ` [PATCH 7/7] drm/amdgpu: define RAS poison mode query function Tao Zhou
5 siblings, 0 replies; 8+ messages in thread
From: Tao Zhou @ 2022-12-08 7:51 UTC (permalink / raw)
To: amd-gfx, hawking.zhang, stanley.yang, Gavin.Wan, Vignesh.Chander,
david.yu
Cc: Tao Zhou
Support VCN/JPEG RAS in both bare metal and SRIOV environment.
v2: update commit description.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 4e450e0b14fa..56d2c581f545 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2348,22 +2348,24 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
dev_info(adev->dev, "SRAM ECC is active.\n");
- if (!amdgpu_sriov_vf(adev)) {
+ if (!amdgpu_sriov_vf(adev))
adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
1 << AMDGPU_RAS_BLOCK__DF);
-
- if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0) ||
- adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 0))
- adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
- 1 << AMDGPU_RAS_BLOCK__JPEG);
- else
- adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
- 1 << AMDGPU_RAS_BLOCK__JPEG);
- } else {
+ else
adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
1 << AMDGPU_RAS_BLOCK__SDMA |
1 << AMDGPU_RAS_BLOCK__GFX);
- }
+
+ /* VCN/JPEG RAS can be supported on both bare metal and
+ * SRIOV environment
+ */
+ if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0) ||
+ adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 0))
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
+ 1 << AMDGPU_RAS_BLOCK__JPEG);
+ else
+ adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
+ 1 << AMDGPU_RAS_BLOCK__JPEG);
} else {
dev_info(adev->dev, "SRAM ECC is not presented.\n");
}
--
2.35.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 7/7] drm/amdgpu: define RAS poison mode query function
2022-12-08 7:51 [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV Tao Zhou
` (4 preceding siblings ...)
2022-12-08 7:51 ` [PATCH 6/7] drm/amdgpu: update VCN/JPEG RAS setting Tao Zhou
@ 2022-12-08 7:51 ` Tao Zhou
2022-12-08 8:30 ` Zhang, Hawking
5 siblings, 1 reply; 8+ messages in thread
From: Tao Zhou @ 2022-12-08 7:51 UTC (permalink / raw)
To: amd-gfx, hawking.zhang, stanley.yang, Gavin.Wan, Vignesh.Chander,
david.yu
Cc: Tao Zhou
1. no need to query poison mode on SRIOV guest side, host can handle it.
2. define the function to simplify code.
v2: rename amdgpu_ras_poison_mode_query to amdgpu_ras_query_poison_mode.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 54 +++++++++++++++----------
1 file changed, 33 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 56d2c581f545..0735dfd72c99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2411,11 +2411,42 @@ static void amdgpu_ras_counte_dw(struct work_struct *work)
pm_runtime_put_autosuspend(dev->dev);
}
+static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ bool df_poison, umc_poison;
+
+ /* poison setting is useless on SRIOV guest */
+ if (amdgpu_sriov_vf(adev) || !con)
+ return;
+
+ /* Init poison supported flag, the default value is false */
+ if (adev->gmc.xgmi.connected_to_cpu) {
+ /* enabled by default when GPU is connected to CPU */
+ con->poison_supported = true;
+ } else if (adev->df.funcs &&
+ adev->df.funcs->query_ras_poison_mode &&
+ adev->umc.ras &&
+ adev->umc.ras->query_ras_poison_mode) {
+ df_poison =
+ adev->df.funcs->query_ras_poison_mode(adev);
+ umc_poison =
+ adev->umc.ras->query_ras_poison_mode(adev);
+
+ /* Only poison is set in both DF and UMC, we can support it */
+ if (df_poison && umc_poison)
+ con->poison_supported = true;
+ else if (df_poison != umc_poison)
+ dev_warn(adev->dev,
+ "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
+ df_poison, umc_poison);
+ }
+}
+
int amdgpu_ras_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
int r;
- bool df_poison, umc_poison;
if (con)
return 0;
@@ -2490,26 +2521,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
goto release_con;
}
- /* Init poison supported flag, the default value is false */
- if (adev->gmc.xgmi.connected_to_cpu) {
- /* enabled by default when GPU is connected to CPU */
- con->poison_supported = true;
- }
- else if (adev->df.funcs &&
- adev->df.funcs->query_ras_poison_mode &&
- adev->umc.ras &&
- adev->umc.ras->query_ras_poison_mode) {
- df_poison =
- adev->df.funcs->query_ras_poison_mode(adev);
- umc_poison =
- adev->umc.ras->query_ras_poison_mode(adev);
- /* Only poison is set in both DF and UMC, we can support it */
- if (df_poison && umc_poison)
- con->poison_supported = true;
- else if (df_poison != umc_poison)
- dev_warn(adev->dev, "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
- df_poison, umc_poison);
- }
+ amdgpu_ras_query_poison_mode(adev);
if (amdgpu_ras_fs_init(adev)) {
r = -EINVAL;
--
2.35.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* RE: [PATCH 7/7] drm/amdgpu: define RAS poison mode query function
2022-12-08 7:51 ` [PATCH 7/7] drm/amdgpu: define RAS poison mode query function Tao Zhou
@ 2022-12-08 8:30 ` Zhang, Hawking
0 siblings, 0 replies; 8+ messages in thread
From: Zhang, Hawking @ 2022-12-08 8:30 UTC (permalink / raw)
To: Zhou1, Tao, amd-gfx, Yang, Stanley, Wan, Gavin, Chander, Vignesh,
Yu, David
Cc: Zhou1, Tao
[AMD Official Use Only - General]
Series is
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Regards,
Hawking
-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Tao Zhou
Sent: Thursday, December 8, 2022 15:51
To: amd-gfx@lists.freedesktop.org; Zhang, Hawking <Hawking.Zhang@amd.com>; Yang, Stanley <Stanley.Yang@amd.com>; Wan, Gavin <Gavin.Wan@amd.com>; Chander, Vignesh <Vignesh.Chander@amd.com>; Yu, David <David.Yu@amd.com>
Cc: Zhou1, Tao <Tao.Zhou1@amd.com>
Subject: [PATCH 7/7] drm/amdgpu: define RAS poison mode query function
1. no need to query poison mode on SRIOV guest side, host can handle it.
2. define the function to simplify code.
v2: rename amdgpu_ras_poison_mode_query to amdgpu_ras_query_poison_mode.
Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 54 +++++++++++++++----------
1 file changed, 33 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 56d2c581f545..0735dfd72c99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2411,11 +2411,42 @@ static void amdgpu_ras_counte_dw(struct work_struct *work)
pm_runtime_put_autosuspend(dev->dev);
}
+static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev) {
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ bool df_poison, umc_poison;
+
+ /* poison setting is useless on SRIOV guest */
+ if (amdgpu_sriov_vf(adev) || !con)
+ return;
+
+ /* Init poison supported flag, the default value is false */
+ if (adev->gmc.xgmi.connected_to_cpu) {
+ /* enabled by default when GPU is connected to CPU */
+ con->poison_supported = true;
+ } else if (adev->df.funcs &&
+ adev->df.funcs->query_ras_poison_mode &&
+ adev->umc.ras &&
+ adev->umc.ras->query_ras_poison_mode) {
+ df_poison =
+ adev->df.funcs->query_ras_poison_mode(adev);
+ umc_poison =
+ adev->umc.ras->query_ras_poison_mode(adev);
+
+ /* Only poison is set in both DF and UMC, we can support it */
+ if (df_poison && umc_poison)
+ con->poison_supported = true;
+ else if (df_poison != umc_poison)
+ dev_warn(adev->dev,
+ "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
+ df_poison, umc_poison);
+ }
+}
+
int amdgpu_ras_init(struct amdgpu_device *adev) {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
int r;
- bool df_poison, umc_poison;
if (con)
return 0;
@@ -2490,26 +2521,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
goto release_con;
}
- /* Init poison supported flag, the default value is false */
- if (adev->gmc.xgmi.connected_to_cpu) {
- /* enabled by default when GPU is connected to CPU */
- con->poison_supported = true;
- }
- else if (adev->df.funcs &&
- adev->df.funcs->query_ras_poison_mode &&
- adev->umc.ras &&
- adev->umc.ras->query_ras_poison_mode) {
- df_poison =
- adev->df.funcs->query_ras_poison_mode(adev);
- umc_poison =
- adev->umc.ras->query_ras_poison_mode(adev);
- /* Only poison is set in both DF and UMC, we can support it */
- if (df_poison && umc_poison)
- con->poison_supported = true;
- else if (df_poison != umc_poison)
- dev_warn(adev->dev, "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
- df_poison, umc_poison);
- }
+ amdgpu_ras_query_poison_mode(adev);
if (amdgpu_ras_fs_init(adev)) {
r = -EINVAL;
--
2.35.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2022-12-08 8:30 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-08 7:51 [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV Tao Zhou
2022-12-08 7:51 ` [PATCH 2/7] drm/amdgpu: add RAS poison consumption handler for NV SRIOV Tao Zhou
2022-12-08 7:51 ` [PATCH 3/7] drm/amdgpu: add RAS poison consumption handler for SRIOV Tao Zhou
2022-12-08 7:51 ` [PATCH 4/7] drm/amdgpu: add VCN " Tao Zhou
2022-12-08 7:51 ` [PATCH 5/7] drm/amdgpu: skip RAS error injection in SRIOV Tao Zhou
2022-12-08 7:51 ` [PATCH 6/7] drm/amdgpu: update VCN/JPEG RAS setting Tao Zhou
2022-12-08 7:51 ` [PATCH 7/7] drm/amdgpu: define RAS poison mode query function Tao Zhou
2022-12-08 8:30 ` Zhang, Hawking
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.