All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RESEND] drm/amdgpu: Remove the vega10 from ras support list
@ 2022-01-27 11:47 majun
  2022-01-27 14:36 ` Chen, Guchun
  0 siblings, 1 reply; 3+ messages in thread
From: majun @ 2022-01-27 11:47 UTC (permalink / raw)
  To: amd-gfx, brahma_sw_dev; +Cc: Tao.Zhou1, Hawking.Zhang

Remove vega10 from the ras support check function.
Base on this change, the ras initial function is optimized.

Signed-off-by: majun <majun@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 38 +++++++++++++------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 37e9b7e82993..aa1de974e07e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2129,8 +2129,7 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
 
 static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
 {
-	return adev->asic_type == CHIP_VEGA10 ||
-		adev->asic_type == CHIP_VEGA20 ||
+	return adev->asic_type == CHIP_VEGA20 ||
 		adev->asic_type == CHIP_ARCTURUS ||
 		adev->asic_type == CHIP_ALDEBARAN ||
 		adev->asic_type == CHIP_SIENNA_CICHLID;
@@ -2164,13 +2163,13 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
  * we have to initialize ras as normal. but need check if operation is
  * allowed or not in each function.
  */
-static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
+static bool amdgpu_ras_check_supported(struct amdgpu_device *adev)
 {
 	adev->ras_hw_enabled = adev->ras_enabled = 0;
 
 	if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
 	    !amdgpu_ras_asic_supported(adev))
-		return;
+		return false;
 
 	if (!adev->gmc.xgmi.connected_to_cpu) {
 		if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
@@ -2203,6 +2202,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
 
 	adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
 		adev->ras_hw_enabled & amdgpu_ras_mask;
+
+	return true;
 }
 
 static void amdgpu_ras_counte_dw(struct work_struct *work)
@@ -2236,6 +2237,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 	int r;
 	bool df_poison, umc_poison;
 
+	if (!amdgpu_ras_check_supported(adev))
+		return -EINVAL;
+
 	if (con)
 		return 0;
 
@@ -2250,28 +2254,24 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 	INIT_DELAYED_WORK(&con->ras_counte_delay_work, amdgpu_ras_counte_dw);
 	atomic_set(&con->ras_ce_count, 0);
 	atomic_set(&con->ras_ue_count, 0);
-
 	con->objs = (struct ras_manager *)(con + 1);
+	con->features = 0;
 
 	amdgpu_ras_set_context(adev, con);
 
-	amdgpu_ras_check_supported(adev);
-
-	if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) {
-		/* set gfx block ras context feature for VEGA20 Gaming
-		 * send ras disable cmd to ras ta during ras late init.
-		 */
-		if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) {
+	if (!adev->ras_enabled) {
+	/* set gfx block ras context feature for VEGA20 Gaming
+	 * send ras disable cmd to ras ta during ras late init.
+	 */
+		if (adev->asic_type == CHIP_VEGA20) {
 			con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
-
 			return 0;
+		} else {
+			r = 0;
+			goto release_con;
 		}
-
-		r = 0;
-		goto release_con;
 	}
 
-	con->features = 0;
 	INIT_LIST_HEAD(&con->head);
 	/* Might need get this flag from vbios. */
 	con->flags = RAS_DEFAULT_FLAGS;
@@ -2545,7 +2545,9 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
 
 void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
 {
-	amdgpu_ras_check_supported(adev);
+	if (!amdgpu_ras_check_supported(adev))
+		return;
+
 	if (!adev->ras_hw_enabled)
 		return;
 
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* RE: [PATCH RESEND] drm/amdgpu: Remove the vega10 from ras support list
  2022-01-27 11:47 [PATCH RESEND] drm/amdgpu: Remove the vega10 from ras support list majun
@ 2022-01-27 14:36 ` Chen, Guchun
  2022-01-28 11:24   ` Ma, Jun
  0 siblings, 1 reply; 3+ messages in thread
From: Chen, Guchun @ 2022-01-27 14:36 UTC (permalink / raw)
  To: Ma, Jun, amd-gfx, brahma_sw_dev; +Cc: Zhou1, Tao, Zhang, Hawking

[AMD Official Use Only]

Hi Jun,

In RAS code, we have this special handling for Vega10. Can you elaborate it please? Any problem you have observed?

Regards,
Guchun

-----Original Message-----
From: Ma, Jun <Jun.Ma2@amd.com> 
Sent: Thursday, January 27, 2022 7:47 PM
To: amd-gfx@lists.freedesktop.org; brahma_sw_dev <brahma_sw_dev@amd.com>
Cc: Zhang, Hawking <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Ma, Jun <Jun.Ma2@amd.com>
Subject: [PATCH RESEND] drm/amdgpu: Remove the vega10 from ras support list

Remove vega10 from the ras support check function.
Base on this change, the ras initial function is optimized.

Signed-off-by: majun <majun@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 38 +++++++++++++------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 37e9b7e82993..aa1de974e07e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2129,8 +2129,7 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
 
 static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)  {
-	return adev->asic_type == CHIP_VEGA10 ||
-		adev->asic_type == CHIP_VEGA20 ||
+	return adev->asic_type == CHIP_VEGA20 ||
 		adev->asic_type == CHIP_ARCTURUS ||
 		adev->asic_type == CHIP_ALDEBARAN ||
 		adev->asic_type == CHIP_SIENNA_CICHLID; @@ -2164,13 +2163,13 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
  * we have to initialize ras as normal. but need check if operation is
  * allowed or not in each function.
  */
-static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
+static bool amdgpu_ras_check_supported(struct amdgpu_device *adev)
 {
 	adev->ras_hw_enabled = adev->ras_enabled = 0;
 
 	if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
 	    !amdgpu_ras_asic_supported(adev))
-		return;
+		return false;
 
 	if (!adev->gmc.xgmi.connected_to_cpu) {
 		if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { @@ -2203,6 +2202,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
 
 	adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
 		adev->ras_hw_enabled & amdgpu_ras_mask;
+
+	return true;
 }
 
 static void amdgpu_ras_counte_dw(struct work_struct *work) @@ -2236,6 +2237,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 	int r;
 	bool df_poison, umc_poison;
 
+	if (!amdgpu_ras_check_supported(adev))
+		return -EINVAL;
+
 	if (con)
 		return 0;
 
@@ -2250,28 +2254,24 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
 	INIT_DELAYED_WORK(&con->ras_counte_delay_work, amdgpu_ras_counte_dw);
 	atomic_set(&con->ras_ce_count, 0);
 	atomic_set(&con->ras_ue_count, 0);
-
 	con->objs = (struct ras_manager *)(con + 1);
+	con->features = 0;
 
 	amdgpu_ras_set_context(adev, con);
 
-	amdgpu_ras_check_supported(adev);
-
-	if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) {
-		/* set gfx block ras context feature for VEGA20 Gaming
-		 * send ras disable cmd to ras ta during ras late init.
-		 */
-		if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) {
+	if (!adev->ras_enabled) {
+	/* set gfx block ras context feature for VEGA20 Gaming
+	 * send ras disable cmd to ras ta during ras late init.
+	 */
+		if (adev->asic_type == CHIP_VEGA20) {
 			con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
-
 			return 0;
+		} else {
+			r = 0;
+			goto release_con;
 		}
-
-		r = 0;
-		goto release_con;
 	}
 
-	con->features = 0;
 	INIT_LIST_HEAD(&con->head);
 	/* Might need get this flag from vbios. */
 	con->flags = RAS_DEFAULT_FLAGS;
@@ -2545,7 +2545,9 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
 
 void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)  {
-	amdgpu_ras_check_supported(adev);
+	if (!amdgpu_ras_check_supported(adev))
+		return;
+
 	if (!adev->ras_hw_enabled)
 		return;
 
--
2.25.1

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH RESEND] drm/amdgpu: Remove the vega10 from ras support list
  2022-01-27 14:36 ` Chen, Guchun
@ 2022-01-28 11:24   ` Ma, Jun
  0 siblings, 0 replies; 3+ messages in thread
From: Ma, Jun @ 2022-01-28 11:24 UTC (permalink / raw)
  To: Chen, Guchun, Ma, Jun, amd-gfx, brahma_sw_dev; +Cc: Zhou1, Tao, Zhang, Hawking


On 1/27/2022 10:36 PM, Chen, Guchun wrote:
> [AMD Official Use Only]
> 
> Hi Jun,
> 
> In RAS code, we have this special handling for Vega10. Can you elaborate it please? Any problem you have observed?

Ok, thanks for review. I'll confirm this.

> 
> Regards,
> Guchun
> 
> -----Original Message-----
> From: Ma, Jun <Jun.Ma2@amd.com> 
> Sent: Thursday, January 27, 2022 7:47 PM
> To: amd-gfx@lists.freedesktop.org; brahma_sw_dev <brahma_sw_dev@amd.com>
> Cc: Zhang, Hawking <Hawking.Zhang@amd.com>; Zhou1, Tao <Tao.Zhou1@amd.com>; Ma, Jun <Jun.Ma2@amd.com>
> Subject: [PATCH RESEND] drm/amdgpu: Remove the vega10 from ras support list
> 
> Remove vega10 from the ras support check function.
> Base on this change, the ras initial function is optimized.
> 
> Signed-off-by: majun <majun@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 38 +++++++++++++------------
>  1 file changed, 20 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> index 37e9b7e82993..aa1de974e07e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
> @@ -2129,8 +2129,7 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
>  
>  static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)  {
> -	return adev->asic_type == CHIP_VEGA10 ||
> -		adev->asic_type == CHIP_VEGA20 ||
> +	return adev->asic_type == CHIP_VEGA20 ||
>  		adev->asic_type == CHIP_ARCTURUS ||
>  		adev->asic_type == CHIP_ALDEBARAN ||
>  		adev->asic_type == CHIP_SIENNA_CICHLID; @@ -2164,13 +2163,13 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
>   * we have to initialize ras as normal. but need check if operation is
>   * allowed or not in each function.
>   */
> -static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
> +static bool amdgpu_ras_check_supported(struct amdgpu_device *adev)
>  {
>  	adev->ras_hw_enabled = adev->ras_enabled = 0;
>  
>  	if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
>  	    !amdgpu_ras_asic_supported(adev))
> -		return;
> +		return false;
>  
>  	if (!adev->gmc.xgmi.connected_to_cpu) {
>  		if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { @@ -2203,6 +2202,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
>  
>  	adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
>  		adev->ras_hw_enabled & amdgpu_ras_mask;
> +
> +	return true;
>  }
>  
>  static void amdgpu_ras_counte_dw(struct work_struct *work) @@ -2236,6 +2237,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
>  	int r;
>  	bool df_poison, umc_poison;
>  
> +	if (!amdgpu_ras_check_supported(adev))
> +		return -EINVAL;
> +
>  	if (con)
>  		return 0;
>  
> @@ -2250,28 +2254,24 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
>  	INIT_DELAYED_WORK(&con->ras_counte_delay_work, amdgpu_ras_counte_dw);
>  	atomic_set(&con->ras_ce_count, 0);
>  	atomic_set(&con->ras_ue_count, 0);
> -
>  	con->objs = (struct ras_manager *)(con + 1);
> +	con->features = 0;
>  
>  	amdgpu_ras_set_context(adev, con);
>  
> -	amdgpu_ras_check_supported(adev);
> -
> -	if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) {
> -		/* set gfx block ras context feature for VEGA20 Gaming
> -		 * send ras disable cmd to ras ta during ras late init.
> -		 */
> -		if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) {
> +	if (!adev->ras_enabled) {
> +	/* set gfx block ras context feature for VEGA20 Gaming
> +	 * send ras disable cmd to ras ta during ras late init.
> +	 */
> +		if (adev->asic_type == CHIP_VEGA20) {
>  			con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
> -
>  			return 0;
> +		} else {
> +			r = 0;
> +			goto release_con;
>  		}
> -
> -		r = 0;
> -		goto release_con;
>  	}
>  
> -	con->features = 0;
>  	INIT_LIST_HEAD(&con->head);
>  	/* Might need get this flag from vbios. */
>  	con->flags = RAS_DEFAULT_FLAGS;
> @@ -2545,7 +2545,9 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
>  
>  void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)  {
> -	amdgpu_ras_check_supported(adev);
> +	if (!amdgpu_ras_check_supported(adev))
> +		return;
> +
>  	if (!adev->ras_hw_enabled)
>  		return;
>  
> --
> 2.25.1

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-01-28 13:39 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-27 11:47 [PATCH RESEND] drm/amdgpu: Remove the vega10 from ras support list majun
2022-01-27 14:36 ` Chen, Guchun
2022-01-28 11:24   ` Ma, Jun

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.