All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu/gmc9: give more chance for tlb flush if failed(v2)
@ 2018-04-04  5:01 Emily Deng
       [not found] ` <1522818115-20328-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 2+ messages in thread
From: Emily Deng @ 2018-04-04  5:01 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Emily Deng, Monk Liu

under SR-IOV sometimes CPU based tlb flush would timeout within
the given 100ms period, instead let it fail and continue we can
give it more chance to repeat the tlb flush on the failed VMHUB

this could fix the massive "Timeout waiting for VM flush ACK"
error during vk_encoder test.

v2:refine the code

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Signed-off-by: Emily Deng <Emily.Deng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 50 ++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 503070f..44602d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -328,7 +328,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
 {
 	/* Use register 17 for GART */
 	const unsigned eng = 17;
-	unsigned i, j;
+	unsigned retry = 3;
+	unsigned i, j, k;
 
 	spin_lock(&adev->gmc.invalidate_lock);
 
@@ -336,31 +337,36 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
 		struct amdgpu_vmhub *hub = &adev->vmhub[i];
 		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
 
-		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+		for (k = 0; k < retry; ++k) {
 
-		/* Busy wait for ACK.*/
-		for (j = 0; j < 100; j++) {
-			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
-			tmp &= 1 << vmid;
-			if (tmp)
+			WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+
+			/* Busy wait for ACK.*/
+			for (j = 0; j < 100; j++) {
+				tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
+				tmp &= 1 << vmid;
+				if (tmp)
+					break;
+				cpu_relax();
+			}
+			if (j < 100)
 				break;
-			cpu_relax();
-		}
-		if (j < 100)
-			continue;
-
-		/* Wait for ACK with a delay.*/
-		for (j = 0; j < adev->usec_timeout; j++) {
-			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
-			tmp &= 1 << vmid;
-			if (tmp)
+
+			/* Wait for ACK with a delay.*/
+			for (j = 0; j < adev->usec_timeout; j++) {
+				tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
+				tmp &= 1 << vmid;
+				if (tmp)
+					break;
+				udelay(1);
+			}
+			if (j < adev->usec_timeout)
 				break;
-			udelay(1);
+			if (k == retry)
+				DRM_ERROR("Timeout waiting for VM flush ACK!\n");
+			else
+				DRM_ERROR("Need one more try to write the VMHUB flush request!");
 		}
-		if (j < adev->usec_timeout)
-			continue;
-
-		DRM_ERROR("Timeout waiting for VM flush ACK!\n");
 	}
 
 	spin_unlock(&adev->gmc.invalidate_lock);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] drm/amdgpu/gmc9: give more chance for tlb flush if failed(v2)
       [not found] ` <1522818115-20328-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
@ 2018-04-04  8:00   ` Christian König
  0 siblings, 0 replies; 2+ messages in thread
From: Christian König @ 2018-04-04  8:00 UTC (permalink / raw)
  To: Emily Deng, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

Am 04.04.2018 um 07:01 schrieb Emily Deng:
> under SR-IOV sometimes CPU based tlb flush would timeout within
> the given 100ms period, instead let it fail and continue we can
> give it more chance to repeat the tlb flush on the failed VMHUB
>
> this could fix the massive "Timeout waiting for VM flush ACK"
> error during vk_encoder test.
>
> v2:refine the code
>
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> Signed-off-by: Emily Deng <Emily.Deng@amd.com>

Acked-by: Christian König <christian.koenig@amd.com>

But that is still a rather ugly workaround, we should probably not 
upstream that and wait for the RLC fix instead.

Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 50 ++++++++++++++++++++---------------
>   1 file changed, 28 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 503070f..44602d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -328,7 +328,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
>   {
>   	/* Use register 17 for GART */
>   	const unsigned eng = 17;
> -	unsigned i, j;
> +	unsigned retry = 3;
> +	unsigned i, j, k;
>   
>   	spin_lock(&adev->gmc.invalidate_lock);
>   
> @@ -336,31 +337,36 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
>   		struct amdgpu_vmhub *hub = &adev->vmhub[i];
>   		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
>   
> -		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
> +		for (k = 0; k < retry; ++k) {
>   
> -		/* Busy wait for ACK.*/
> -		for (j = 0; j < 100; j++) {
> -			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
> -			tmp &= 1 << vmid;
> -			if (tmp)
> +			WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
> +
> +			/* Busy wait for ACK.*/
> +			for (j = 0; j < 100; j++) {
> +				tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
> +				tmp &= 1 << vmid;
> +				if (tmp)
> +					break;
> +				cpu_relax();
> +			}
> +			if (j < 100)
>   				break;
> -			cpu_relax();
> -		}
> -		if (j < 100)
> -			continue;
> -
> -		/* Wait for ACK with a delay.*/
> -		for (j = 0; j < adev->usec_timeout; j++) {
> -			tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
> -			tmp &= 1 << vmid;
> -			if (tmp)
> +
> +			/* Wait for ACK with a delay.*/
> +			for (j = 0; j < adev->usec_timeout; j++) {
> +				tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
> +				tmp &= 1 << vmid;
> +				if (tmp)
> +					break;
> +				udelay(1);
> +			}
> +			if (j < adev->usec_timeout)
>   				break;
> -			udelay(1);
> +			if (k == retry)
> +				DRM_ERROR("Timeout waiting for VM flush ACK!\n");
> +			else
> +				DRM_ERROR("Need one more try to write the VMHUB flush request!");
>   		}
> -		if (j < adev->usec_timeout)
> -			continue;
> -
> -		DRM_ERROR("Timeout waiting for VM flush ACK!\n");
>   	}
>   
>   	spin_unlock(&adev->gmc.invalidate_lock);

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2018-04-04  8:00 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-04-04  5:01 [PATCH] drm/amdgpu/gmc9: give more chance for tlb flush if failed(v2) Emily Deng
     [not found] ` <1522818115-20328-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
2018-04-04  8:00   ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.