All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu: enable RAS IH for poison consumption
@ 2022-05-10  6:29 Tao Zhou
  2022-05-10  6:29 ` [PATCH 2/2] drm/amdgpu: refine RAS poison consumption handler Tao Zhou
  2022-05-10  6:36 ` [PATCH 1/2] drm/amdgpu: enable RAS IH for poison consumption Ziya, Mohammad zafar
  0 siblings, 2 replies; 4+ messages in thread
From: Tao Zhou @ 2022-05-10  6:29 UTC (permalink / raw)
  To: amd-gfx, Mohammadzafar.Ziya, Lijo.Lazar, hawking.zhang,
	stanley.yang, YiPeng.Chai
  Cc: Tao Zhou

Enable RAS IH if poison consumption handler is implemented.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index cac56f830aed..91d9e9969b4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2516,7 +2516,9 @@ int amdgpu_ras_block_late_init(struct amdgpu_device *adev,
 		return 0;
 
 	ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm);
-	if (ras_obj->ras_cb) {
+	if (ras_obj->ras_cb || (ras_obj->hw_ops &&
+	    (ras_obj->hw_ops->query_poison_status ||
+	    ras_obj->hw_ops->handle_poison_consumption))) {
 		r = amdgpu_ras_interrupt_add_handler(adev, ras_block);
 		if (r)
 			goto cleanup;
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/2] drm/amdgpu: refine RAS poison consumption handler
  2022-05-10  6:29 [PATCH 1/2] drm/amdgpu: enable RAS IH for poison consumption Tao Zhou
@ 2022-05-10  6:29 ` Tao Zhou
  2022-05-10 10:14   ` Lazar, Lijo
  2022-05-10  6:36 ` [PATCH 1/2] drm/amdgpu: enable RAS IH for poison consumption Ziya, Mohammad zafar
  1 sibling, 1 reply; 4+ messages in thread
From: Tao Zhou @ 2022-05-10  6:29 UTC (permalink / raw)
  To: amd-gfx, Mohammadzafar.Ziya, Lijo.Lazar, hawking.zhang,
	stanley.yang, YiPeng.Chai
  Cc: Tao Zhou

Qeury ras status before ras poison consumption handling, add more
comment and log.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 43 +++++++++++++++----------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 91d9e9969b4e..a653cf3b3d13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1538,33 +1538,42 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
 static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj,
 				struct amdgpu_iv_entry *entry)
 {
-	bool poison_stat = true, need_reset = true;
+	bool poison_stat = false;
 	struct amdgpu_device *adev = obj->adev;
 	struct ras_err_data err_data = {0, 0, 0, NULL};
 	struct amdgpu_ras_block_object *block_obj =
 		amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
 
-	if (!adev->gmc.xgmi.connected_to_cpu)
-		amdgpu_umc_poison_handler(adev, &err_data, false);
-
-	/* both query_poison_status and handle_poison_consumption are optional */
-	if (block_obj && block_obj->hw_ops) {
-		if (block_obj->hw_ops->query_poison_status) {
-			poison_stat = block_obj->hw_ops->query_poison_status(adev);
-			if (!poison_stat)
-				dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
-						block_obj->ras_comm.name);
-		}
+	if (!block_obj || !block_obj->hw_ops)
+		return;
 
-		if (poison_stat && block_obj->hw_ops->handle_poison_consumption) {
-			poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
-			need_reset = poison_stat;
+	/* both query_poison_status and handle_poison_consumption are optional,
+	 * but at least one of them should be implemented if we need poison
+	 * consumption handler
+	 */
+	if (block_obj->hw_ops->query_poison_status) {
+		poison_stat = block_obj->hw_ops->query_poison_status(adev);
+		if (!poison_stat) {
+			/* Not poison consumption interrupt, no need to handle it */
+			dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
+					block_obj->ras_comm.name);
+
+			return;
 		}
 	}
 
-	/* gpu reset is fallback for all failed cases */
-	if (need_reset)
+	if (!adev->gmc.xgmi.connected_to_cpu)
+		amdgpu_umc_poison_handler(adev, &err_data, false);
+
+	if (block_obj->hw_ops->handle_poison_consumption)
+		poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
+
+	/* gpu reset is fallback for failed and default cases */
+	if (poison_stat) {
+		dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n",
+				block_obj->ras_comm.name);
 		amdgpu_ras_reset_gpu(adev);
+	}
 }
 
 static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj,
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* RE: [PATCH 1/2] drm/amdgpu: enable RAS IH for poison consumption
  2022-05-10  6:29 [PATCH 1/2] drm/amdgpu: enable RAS IH for poison consumption Tao Zhou
  2022-05-10  6:29 ` [PATCH 2/2] drm/amdgpu: refine RAS poison consumption handler Tao Zhou
@ 2022-05-10  6:36 ` Ziya, Mohammad zafar
  1 sibling, 0 replies; 4+ messages in thread
From: Ziya, Mohammad zafar @ 2022-05-10  6:36 UTC (permalink / raw)
  To: Zhou1, Tao, amd-gfx, Lazar, Lijo, Zhang, Hawking, Yang, Stanley,
	Chai, Thomas
  Cc: Zhou1, Tao

[AMD Official Use Only - General]


Reviewed-by: Mohammad Zafar Ziya <Mohammadzafar.ziya@amd.com>

>-----Original Message-----
>From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Tao
>Zhou
>Sent: Tuesday, May 10, 2022 12:00 PM
>To: amd-gfx@lists.freedesktop.org; Ziya, Mohammad zafar
><Mohammadzafar.Ziya@amd.com>; Lazar, Lijo <Lijo.Lazar@amd.com>;
>Zhang, Hawking <Hawking.Zhang@amd.com>; Yang, Stanley
><Stanley.Yang@amd.com>; Chai, Thomas <YiPeng.Chai@amd.com>
>Cc: Zhou1, Tao <Tao.Zhou1@amd.com>
>Subject: [PATCH 1/2] drm/amdgpu: enable RAS IH for poison consumption
>
>Enable RAS IH if poison consumption handler is implemented.
>
>Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>index cac56f830aed..91d9e9969b4e 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
>@@ -2516,7 +2516,9 @@ int amdgpu_ras_block_late_init(struct
>amdgpu_device *adev,
> 		return 0;
>
> 	ras_obj = container_of(ras_block, struct amdgpu_ras_block_object,
>ras_comm);
>-	if (ras_obj->ras_cb) {
>+	if (ras_obj->ras_cb || (ras_obj->hw_ops &&
>+	    (ras_obj->hw_ops->query_poison_status ||
>+	    ras_obj->hw_ops->handle_poison_consumption))) {
> 		r = amdgpu_ras_interrupt_add_handler(adev, ras_block);
> 		if (r)
> 			goto cleanup;
>--
>2.35.1

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: refine RAS poison consumption handler
  2022-05-10  6:29 ` [PATCH 2/2] drm/amdgpu: refine RAS poison consumption handler Tao Zhou
@ 2022-05-10 10:14   ` Lazar, Lijo
  0 siblings, 0 replies; 4+ messages in thread
From: Lazar, Lijo @ 2022-05-10 10:14 UTC (permalink / raw)
  To: Tao Zhou, amd-gfx, Mohammadzafar.Ziya, hawking.zhang,
	stanley.yang, YiPeng.Chai



On 5/10/2022 11:59 AM, Tao Zhou wrote:
> Qeury ras status before ras poison consumption handling, add more
> comment and log.
> 
> Signed-off-by: Tao Zhou <tao.zhou1@amd.com>

Series is :
	Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>

Thanks,
Lijo

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 43 +++++++++++++++----------
>   1 file changed, 26 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 91d9e9969b4e..a653cf3b3d13 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -1538,33 +1538,42 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
>   static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj,
>   				struct amdgpu_iv_entry *entry)
>   {
> -	bool poison_stat = true, need_reset = true;
> +	bool poison_stat = false;
>   	struct amdgpu_device *adev = obj->adev;
>   	struct ras_err_data err_data = {0, 0, 0, NULL};
>   	struct amdgpu_ras_block_object *block_obj =
>   		amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
>   
> -	if (!adev->gmc.xgmi.connected_to_cpu)
> -		amdgpu_umc_poison_handler(adev, &err_data, false);
> -
> -	/* both query_poison_status and handle_poison_consumption are optional */
> -	if (block_obj && block_obj->hw_ops) {
> -		if (block_obj->hw_ops->query_poison_status) {
> -			poison_stat = block_obj->hw_ops->query_poison_status(adev);
> -			if (!poison_stat)
> -				dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
> -						block_obj->ras_comm.name);
> -		}
> +	if (!block_obj || !block_obj->hw_ops)
> +		return;
>   
> -		if (poison_stat && block_obj->hw_ops->handle_poison_consumption) {
> -			poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
> -			need_reset = poison_stat;
> +	/* both query_poison_status and handle_poison_consumption are optional,
> +	 * but at least one of them should be implemented if we need poison
> +	 * consumption handler
> +	 */
> +	if (block_obj->hw_ops->query_poison_status) {
> +		poison_stat = block_obj->hw_ops->query_poison_status(adev);
> +		if (!poison_stat) {
> +			/* Not poison consumption interrupt, no need to handle it */
> +			dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
> +					block_obj->ras_comm.name);
> +
> +			return;
>   		}
>   	}
>   
> -	/* gpu reset is fallback for all failed cases */
> -	if (need_reset)
> +	if (!adev->gmc.xgmi.connected_to_cpu)
> +		amdgpu_umc_poison_handler(adev, &err_data, false);
> +
> +	if (block_obj->hw_ops->handle_poison_consumption)
> +		poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
> +
> +	/* gpu reset is fallback for failed and default cases */
> +	if (poison_stat) {
> +		dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n",
> +				block_obj->ras_comm.name);
>   		amdgpu_ras_reset_gpu(adev);
> +	}
>   }
>   
>   static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj,
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-05-10 10:15 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-10  6:29 [PATCH 1/2] drm/amdgpu: enable RAS IH for poison consumption Tao Zhou
2022-05-10  6:29 ` [PATCH 2/2] drm/amdgpu: refine RAS poison consumption handler Tao Zhou
2022-05-10 10:14   ` Lazar, Lijo
2022-05-10  6:36 ` [PATCH 1/2] drm/amdgpu: enable RAS IH for poison consumption Ziya, Mohammad zafar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.