All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV
@ 2022-12-08  7:51 Tao Zhou
  2022-12-08  7:51 ` [PATCH 2/7] drm/amdgpu: add RAS poison consumption handler for NV SRIOV Tao Zhou
                   ` (5 more replies)
  0 siblings, 6 replies; 8+ messages in thread
From: Tao Zhou @ 2022-12-08  7:51 UTC (permalink / raw)
  To: amd-gfx, hawking.zhang, stanley.yang, Gavin.Wan, Vignesh.Chander,
	david.yu
  Cc: Tao Zhou

Send message to host and host will handle it.

v2: split it into two parts, one for mxgpu ai and another one for common
poison consumption handler.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 +
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c    | 6 ++++++
 drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h    | 1 +
 3 files changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 2b9d806e23af..b9e9480448af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -88,6 +88,7 @@ struct amdgpu_virt_ops {
 	int (*wait_reset)(struct amdgpu_device *adev);
 	void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req,
 			  u32 data1, u32 data2, u32 data3);
+	void (*ras_poison_handler)(struct amdgpu_device *adev);
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 12906ba74462..63725b2ebc03 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -404,6 +404,11 @@ static int xgpu_ai_request_init_data(struct amdgpu_device *adev)
 	return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA);
 }
 
+static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev)
+{
+	xgpu_ai_send_access_requests(adev, IDH_RAS_POISON);
+}
+
 const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
 	.req_full_gpu	= xgpu_ai_request_full_gpu_access,
 	.rel_full_gpu	= xgpu_ai_release_full_gpu_access,
@@ -411,4 +416,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
 	.wait_reset = NULL,
 	.trans_msg = xgpu_ai_mailbox_trans_msg,
 	.req_init_data  = xgpu_ai_request_init_data,
+	.ras_poison_handler = xgpu_ai_ras_poison_handler,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index fa7e13e0459e..af1a784696bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -39,6 +39,7 @@ enum idh_request {
 
 	IDH_LOG_VF_ERROR       = 200,
 	IDH_READY_TO_RESET 	= 201,
+	IDH_RAS_POISON  = 202,
 };
 
 enum idh_event {
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/7] drm/amdgpu: add RAS poison consumption handler for NV SRIOV
  2022-12-08  7:51 [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV Tao Zhou
@ 2022-12-08  7:51 ` Tao Zhou
  2022-12-08  7:51 ` [PATCH 3/7] drm/amdgpu: add RAS poison consumption handler for SRIOV Tao Zhou
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: Tao Zhou @ 2022-12-08  7:51 UTC (permalink / raw)
  To: amd-gfx, hawking.zhang, stanley.yang, Gavin.Wan, Vignesh.Chander,
	david.yu
  Cc: Tao Zhou

Send handling request to host.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 6 ++++++
 drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index e07757eea7ad..cae1aaa4ddb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -426,6 +426,11 @@ void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev)
 	amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
 }
 
+static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev)
+{
+	xgpu_nv_send_access_requests(adev, IDH_RAS_POISON);
+}
+
 const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
 	.req_full_gpu	= xgpu_nv_request_full_gpu_access,
 	.rel_full_gpu	= xgpu_nv_release_full_gpu_access,
@@ -433,4 +438,5 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
 	.reset_gpu = xgpu_nv_request_reset,
 	.wait_reset = NULL,
 	.trans_msg = xgpu_nv_mailbox_trans_msg,
+	.ras_poison_handler = xgpu_nv_ras_poison_handler,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
index 73887b0aa1d6..d0221ce08769 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h
@@ -39,6 +39,7 @@ enum idh_request {
 
 	IDH_LOG_VF_ERROR	= 200,
 	IDH_READY_TO_RESET 	= 201,
+	IDH_RAS_POISON	= 202,
 };
 
 enum idh_event {
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/7] drm/amdgpu: add RAS poison consumption handler for SRIOV
  2022-12-08  7:51 [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV Tao Zhou
  2022-12-08  7:51 ` [PATCH 2/7] drm/amdgpu: add RAS poison consumption handler for NV SRIOV Tao Zhou
@ 2022-12-08  7:51 ` Tao Zhou
  2022-12-08  7:51 ` [PATCH 4/7] drm/amdgpu: add VCN " Tao Zhou
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: Tao Zhou @ 2022-12-08  7:51 UTC (permalink / raw)
  To: amd-gfx, hawking.zhang, stanley.yang, Gavin.Wan, Vignesh.Chander,
	david.yu
  Cc: Tao Zhou

Send message to PF if VF receives RAS poison consumption interrupt.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 44 +++++++++++++++----------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index f76c19fc0392..1c7fcb4f2380 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -169,25 +169,33 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset)
 {
 	int ret = AMDGPU_RAS_SUCCESS;
 
-	if (!adev->gmc.xgmi.connected_to_cpu) {
-		struct ras_err_data err_data = {0, 0, 0, NULL};
-		struct ras_common_if head = {
-			.block = AMDGPU_RAS_BLOCK__UMC,
-		};
-		struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
-
-		ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
-
-		if (ret == AMDGPU_RAS_SUCCESS && obj) {
-			obj->err_data.ue_count += err_data.ue_count;
-			obj->err_data.ce_count += err_data.ce_count;
+	if (!amdgpu_sriov_vf(adev)) {
+		if (!adev->gmc.xgmi.connected_to_cpu) {
+			struct ras_err_data err_data = {0, 0, 0, NULL};
+			struct ras_common_if head = {
+				.block = AMDGPU_RAS_BLOCK__UMC,
+			};
+			struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
+
+			ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
+
+			if (ret == AMDGPU_RAS_SUCCESS && obj) {
+				obj->err_data.ue_count += err_data.ue_count;
+				obj->err_data.ce_count += err_data.ce_count;
+			}
+		} else if (reset) {
+			/* MCA poison handler is only responsible for GPU reset,
+			 * let MCA notifier do page retirement.
+			 */
+			kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+			amdgpu_ras_reset_gpu(adev);
 		}
-	} else if (reset) {
-		/* MCA poison handler is only responsible for GPU reset,
-		 * let MCA notifier do page retirement.
-		 */
-		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
-		amdgpu_ras_reset_gpu(adev);
+	} else {
+		if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+			adev->virt.ops->ras_poison_handler(adev);
+		else
+			dev_warn(adev->dev,
+				"No ras_poison_handler interface in SRIOV!\n");
 	}
 
 	return ret;
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 4/7] drm/amdgpu: add VCN poison consumption handler for SRIOV
  2022-12-08  7:51 [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV Tao Zhou
  2022-12-08  7:51 ` [PATCH 2/7] drm/amdgpu: add RAS poison consumption handler for NV SRIOV Tao Zhou
  2022-12-08  7:51 ` [PATCH 3/7] drm/amdgpu: add RAS poison consumption handler for SRIOV Tao Zhou
@ 2022-12-08  7:51 ` Tao Zhou
  2022-12-08  7:51 ` [PATCH 5/7] drm/amdgpu: skip RAS error injection in SRIOV Tao Zhou
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 8+ messages in thread
From: Tao Zhou @ 2022-12-08  7:51 UTC (permalink / raw)
  To: amd-gfx, hawking.zhang, stanley.yang, Gavin.Wan, Vignesh.Chander,
	david.yu
  Cc: Tao Zhou

Inform host and let host handle consumption interrupt.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 72fa14ff862f..a23e26b272b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -1246,8 +1246,16 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
 	if (!ras_if)
 		return 0;
 
-	ih_data.head = *ras_if;
-	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+	if (!amdgpu_sriov_vf(adev)) {
+		ih_data.head = *ras_if;
+		amdgpu_ras_interrupt_dispatch(adev, &ih_data);
+	} else {
+		if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
+			adev->virt.ops->ras_poison_handler(adev);
+		else
+			dev_warn(adev->dev,
+				"No ras_poison_handler interface in SRIOV for VCN!\n");
+	}
 
 	return 0;
 }
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 5/7] drm/amdgpu: skip RAS error injection in SRIOV
  2022-12-08  7:51 [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV Tao Zhou
                   ` (2 preceding siblings ...)
  2022-12-08  7:51 ` [PATCH 4/7] drm/amdgpu: add VCN " Tao Zhou
@ 2022-12-08  7:51 ` Tao Zhou
  2022-12-08  7:51 ` [PATCH 6/7] drm/amdgpu: update VCN/JPEG RAS setting Tao Zhou
  2022-12-08  7:51 ` [PATCH 7/7] drm/amdgpu: define RAS poison mode query function Tao Zhou
  5 siblings, 0 replies; 8+ messages in thread
From: Tao Zhou @ 2022-12-08  7:51 UTC (permalink / raw)
  To: amd-gfx, hawking.zhang, stanley.yang, Gavin.Wan, Vignesh.Chander,
	david.yu
  Cc: Tao Zhou

Injection on guest is not allowed.

v2: return directly in SRIOV environment.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index ad490c1e2f57..4e450e0b14fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1087,6 +1087,10 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
 							info->head.block,
 							info->head.sub_block_index);
 
+	/* inject on guest isn't allowed, return success directly */
+	if (amdgpu_sriov_vf(adev))
+		return 0;
+
 	if (!obj)
 		return -EINVAL;
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 6/7] drm/amdgpu: update VCN/JPEG RAS setting
  2022-12-08  7:51 [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV Tao Zhou
                   ` (3 preceding siblings ...)
  2022-12-08  7:51 ` [PATCH 5/7] drm/amdgpu: skip RAS error injection in SRIOV Tao Zhou
@ 2022-12-08  7:51 ` Tao Zhou
  2022-12-08  7:51 ` [PATCH 7/7] drm/amdgpu: define RAS poison mode query function Tao Zhou
  5 siblings, 0 replies; 8+ messages in thread
From: Tao Zhou @ 2022-12-08  7:51 UTC (permalink / raw)
  To: amd-gfx, hawking.zhang, stanley.yang, Gavin.Wan, Vignesh.Chander,
	david.yu
  Cc: Tao Zhou

Support VCN/JPEG RAS in both bare metal and SRIOV environment.

v2: update commit description.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 4e450e0b14fa..56d2c581f545 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2348,22 +2348,24 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
 
 		if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
 			dev_info(adev->dev, "SRAM ECC is active.\n");
-			if (!amdgpu_sriov_vf(adev)) {
+			if (!amdgpu_sriov_vf(adev))
 				adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
 							    1 << AMDGPU_RAS_BLOCK__DF);
-
-				if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0) ||
-				    adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 0))
-					adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
-							1 << AMDGPU_RAS_BLOCK__JPEG);
-				else
-					adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
-							1 << AMDGPU_RAS_BLOCK__JPEG);
-			} else {
+			else
 				adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF |
 								1 << AMDGPU_RAS_BLOCK__SDMA |
 								1 << AMDGPU_RAS_BLOCK__GFX);
-			}
+
+			/* VCN/JPEG RAS can be supported on both bare metal and
+			 * SRIOV environment
+			 */
+			if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0) ||
+			    adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 0))
+				adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
+							1 << AMDGPU_RAS_BLOCK__JPEG);
+			else
+				adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
+							1 << AMDGPU_RAS_BLOCK__JPEG);
 		} else {
 			dev_info(adev->dev, "SRAM ECC is not presented.\n");
 		}
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 7/7] drm/amdgpu: define RAS poison mode query function
  2022-12-08  7:51 [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV Tao Zhou
                   ` (4 preceding siblings ...)
  2022-12-08  7:51 ` [PATCH 6/7] drm/amdgpu: update VCN/JPEG RAS setting Tao Zhou
@ 2022-12-08  7:51 ` Tao Zhou
  2022-12-08  8:30   ` Zhang, Hawking
  5 siblings, 1 reply; 8+ messages in thread
From: Tao Zhou @ 2022-12-08  7:51 UTC (permalink / raw)
  To: amd-gfx, hawking.zhang, stanley.yang, Gavin.Wan, Vignesh.Chander,
	david.yu
  Cc: Tao Zhou

1. no need to query poison mode on SRIOV guest side, host can handle it.
2. define the function to simplify code.

v2: rename amdgpu_ras_poison_mode_query to amdgpu_ras_query_poison_mode.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 54 +++++++++++++++----------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 56d2c581f545..0735dfd72c99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2411,11 +2411,42 @@ static void amdgpu_ras_counte_dw(struct work_struct *work)
 	pm_runtime_put_autosuspend(dev->dev);
 }
 
+static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev)
+{
+	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+	bool df_poison, umc_poison;
+
+	/* poison setting is useless on SRIOV guest */
+	if (amdgpu_sriov_vf(adev) || !con)
+		return;
+
+	/* Init poison supported flag, the default value is false */
+	if (adev->gmc.xgmi.connected_to_cpu) {
+		/* enabled by default when GPU is connected to CPU */
+		con->poison_supported = true;
+	} else if (adev->df.funcs &&
+	    adev->df.funcs->query_ras_poison_mode &&
+	    adev->umc.ras &&
+	    adev->umc.ras->query_ras_poison_mode) {
+		df_poison =
+			adev->df.funcs->query_ras_poison_mode(adev);
+		umc_poison =
+			adev->umc.ras->query_ras_poison_mode(adev);
+
+		/* Only poison is set in both DF and UMC, we can support it */
+		if (df_poison && umc_poison)
+			con->poison_supported = true;
+		else if (df_poison != umc_poison)
+			dev_warn(adev->dev,
+				"Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
+				df_poison, umc_poison);
+	}
+}
+
 int amdgpu_ras_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
 	int r;
-	bool df_poison, umc_poison;
 
 	if (con)
 		return 0;
@@ -2490,26 +2521,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 			goto release_con;
 	}
 
-	/* Init poison supported flag, the default value is false */
-	if (adev->gmc.xgmi.connected_to_cpu) {
-		/* enabled by default when GPU is connected to CPU */
-		con->poison_supported = true;
-	}
-	else if (adev->df.funcs &&
-	    adev->df.funcs->query_ras_poison_mode &&
-	    adev->umc.ras &&
-	    adev->umc.ras->query_ras_poison_mode) {
-		df_poison =
-			adev->df.funcs->query_ras_poison_mode(adev);
-		umc_poison =
-			adev->umc.ras->query_ras_poison_mode(adev);
-		/* Only poison is set in both DF and UMC, we can support it */
-		if (df_poison && umc_poison)
-			con->poison_supported = true;
-		else if (df_poison != umc_poison)
-			dev_warn(adev->dev, "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
-					df_poison, umc_poison);
-	}
+	amdgpu_ras_query_poison_mode(adev);
 
 	if (amdgpu_ras_fs_init(adev)) {
 		r = -EINVAL;
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* RE: [PATCH 7/7] drm/amdgpu: define RAS poison mode query function
  2022-12-08  7:51 ` [PATCH 7/7] drm/amdgpu: define RAS poison mode query function Tao Zhou
@ 2022-12-08  8:30   ` Zhang, Hawking
  0 siblings, 0 replies; 8+ messages in thread
From: Zhang, Hawking @ 2022-12-08  8:30 UTC (permalink / raw)
  To: Zhou1, Tao, amd-gfx, Yang, Stanley, Wan, Gavin, Chander, Vignesh,
	Yu, David
  Cc: Zhou1, Tao

[AMD Official Use Only - General]

Series is

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>

Regards,
Hawking
-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Tao Zhou
Sent: Thursday, December 8, 2022 15:51
To: amd-gfx@lists.freedesktop.org; Zhang, Hawking <Hawking.Zhang@amd.com>; Yang, Stanley <Stanley.Yang@amd.com>; Wan, Gavin <Gavin.Wan@amd.com>; Chander, Vignesh <Vignesh.Chander@amd.com>; Yu, David <David.Yu@amd.com>
Cc: Zhou1, Tao <Tao.Zhou1@amd.com>
Subject: [PATCH 7/7] drm/amdgpu: define RAS poison mode query function

1. no need to query poison mode on SRIOV guest side, host can handle it.
2. define the function to simplify code.

v2: rename amdgpu_ras_poison_mode_query to amdgpu_ras_query_poison_mode.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 54 +++++++++++++++----------
 1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 56d2c581f545..0735dfd72c99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2411,11 +2411,42 @@ static void amdgpu_ras_counte_dw(struct work_struct *work)
        pm_runtime_put_autosuspend(dev->dev);
 }

+static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev) {
+       struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+       bool df_poison, umc_poison;
+
+       /* poison setting is useless on SRIOV guest */
+       if (amdgpu_sriov_vf(adev) || !con)
+               return;
+
+       /* Init poison supported flag, the default value is false */
+       if (adev->gmc.xgmi.connected_to_cpu) {
+               /* enabled by default when GPU is connected to CPU */
+               con->poison_supported = true;
+       } else if (adev->df.funcs &&
+           adev->df.funcs->query_ras_poison_mode &&
+           adev->umc.ras &&
+           adev->umc.ras->query_ras_poison_mode) {
+               df_poison =
+                       adev->df.funcs->query_ras_poison_mode(adev);
+               umc_poison =
+                       adev->umc.ras->query_ras_poison_mode(adev);
+
+               /* Only poison is set in both DF and UMC, we can support it */
+               if (df_poison && umc_poison)
+                       con->poison_supported = true;
+               else if (df_poison != umc_poison)
+                       dev_warn(adev->dev,
+                               "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
+                               df_poison, umc_poison);
+       }
+}
+
 int amdgpu_ras_init(struct amdgpu_device *adev)  {
        struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
        int r;
-       bool df_poison, umc_poison;

        if (con)
                return 0;
@@ -2490,26 +2521,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
                        goto release_con;
        }

-       /* Init poison supported flag, the default value is false */
-       if (adev->gmc.xgmi.connected_to_cpu) {
-               /* enabled by default when GPU is connected to CPU */
-               con->poison_supported = true;
-       }
-       else if (adev->df.funcs &&
-           adev->df.funcs->query_ras_poison_mode &&
-           adev->umc.ras &&
-           adev->umc.ras->query_ras_poison_mode) {
-               df_poison =
-                       adev->df.funcs->query_ras_poison_mode(adev);
-               umc_poison =
-                       adev->umc.ras->query_ras_poison_mode(adev);
-               /* Only poison is set in both DF and UMC, we can support it */
-               if (df_poison && umc_poison)
-                       con->poison_supported = true;
-               else if (df_poison != umc_poison)
-                       dev_warn(adev->dev, "Poison setting is inconsistent in DF/UMC(%d:%d)!\n",
-                                       df_poison, umc_poison);
-       }
+       amdgpu_ras_query_poison_mode(adev);

        if (amdgpu_ras_fs_init(adev)) {
                r = -EINVAL;
--
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-12-08  8:30 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-08  7:51 [PATCH 1/7] drm/amdgpu: add RAS poison consumption handler for AI SRIOV Tao Zhou
2022-12-08  7:51 ` [PATCH 2/7] drm/amdgpu: add RAS poison consumption handler for NV SRIOV Tao Zhou
2022-12-08  7:51 ` [PATCH 3/7] drm/amdgpu: add RAS poison consumption handler for SRIOV Tao Zhou
2022-12-08  7:51 ` [PATCH 4/7] drm/amdgpu: add VCN " Tao Zhou
2022-12-08  7:51 ` [PATCH 5/7] drm/amdgpu: skip RAS error injection in SRIOV Tao Zhou
2022-12-08  7:51 ` [PATCH 6/7] drm/amdgpu: update VCN/JPEG RAS setting Tao Zhou
2022-12-08  7:51 ` [PATCH 7/7] drm/amdgpu: define RAS poison mode query function Tao Zhou
2022-12-08  8:30   ` Zhang, Hawking

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.