All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/amdgpu: Don't use kiq in gpu reset
@ 2018-08-22  4:39 Emily Deng
       [not found] ` <1534912762-30235-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Emily Deng @ 2018-08-22  4:39 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Emily Deng

When in gpu reset, don't use kiq, it will generate more TDR.

Signed-off-by: Emily Deng <Emily.Deng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index eec991f..fcdbacb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -331,15 +331,8 @@ signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
 
 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
 
-	/* don't wait anymore for gpu reset case because this way may
-	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
-	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
-	 * never return if we keep waiting in virt_kiq_rreg, which cause
-	 * gpu_recover() hang there.
-	 *
-	 * also don't wait anymore for IRQ context
-	 * */
-	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+	/* don't wait anymore for IRQ context */
+	if (r < 1 && in_interrupt())
 		goto failed_kiq;
 
 	might_sleep();
@@ -387,8 +380,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
 		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
 
 		if (adev->gfx.kiq.ring.ready &&
-		    (amdgpu_sriov_runtime(adev) ||
-		     !amdgpu_sriov_vf(adev))) {
+		    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
+		    !adev->in_gpu_reset) {
 			r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng,
 				hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
 			if (!r)
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/3] drm/amdgpu: Don't use kiq in interrupt
       [not found] ` <1534912762-30235-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
@ 2018-08-22  4:39   ` Emily Deng
  2018-08-22  4:39   ` [PATCH 3/3] drm/amdgpu: Use warn to replace error report Emily Deng
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Emily Deng @ 2018-08-22  4:39 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Emily Deng

Don't use kiq interrupt, as it might sleep.

Signed-off-by: Emily Deng <Emily.Deng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index fcdbacb..f49f5f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -331,12 +331,6 @@ signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
 
 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
 
-	/* don't wait anymore for IRQ context */
-	if (r < 1 && in_interrupt())
-		goto failed_kiq;
-
-	might_sleep();
-
 	while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
 		msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
 		r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
@@ -381,7 +375,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
 
 		if (adev->gfx.kiq.ring.ready &&
 		    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
-		    !adev->in_gpu_reset) {
+		    !adev->in_gpu_reset &&
+		    !in_interrupt()) {
 			r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng,
 				hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
 			if (!r)
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/3] drm/amdgpu: Use warn to replace error report
       [not found] ` <1534912762-30235-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
  2018-08-22  4:39   ` [PATCH 2/3] drm/amdgpu: Don't use kiq in interrupt Emily Deng
@ 2018-08-22  4:39   ` Emily Deng
  2018-08-22 12:19   ` [PATCH 1/3] drm/amdgpu: Don't use kiq in gpu reset Deng, Emily
  2018-08-22 12:23   ` Christian König
  3 siblings, 0 replies; 6+ messages in thread
From: Emily Deng @ 2018-08-22  4:39 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Emily Deng

When kiq flush fail, it could fallback to mmio flush, so don't report an
error, just a warning.

Signed-off-by: Emily Deng <Emily.Deng@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index f49f5f3..6214ad3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -342,7 +342,7 @@ signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
 	return 0;
 
 failed_kiq:
-	pr_err("failed to invalidate tlb with kiq\n");
+	pr_warn("failed to invalidate tlb with kiq\n");
 	return r;
 }
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* RE: [PATCH 1/3] drm/amdgpu: Don't use kiq in gpu reset
       [not found] ` <1534912762-30235-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
  2018-08-22  4:39   ` [PATCH 2/3] drm/amdgpu: Don't use kiq in interrupt Emily Deng
  2018-08-22  4:39   ` [PATCH 3/3] drm/amdgpu: Use warn to replace error report Emily Deng
@ 2018-08-22 12:19   ` Deng, Emily
  2018-08-22 12:23   ` Christian König
  3 siblings, 0 replies; 6+ messages in thread
From: Deng, Emily @ 2018-08-22 12:19 UTC (permalink / raw)
  To: Deng, Emily, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Ping......

>-----Original Message-----
>From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Emily
>Deng
>Sent: Wednesday, August 22, 2018 12:39 PM
>To: amd-gfx@lists.freedesktop.org
>Cc: Deng, Emily <Emily.Deng@amd.com>
>Subject: [PATCH 1/3] drm/amdgpu: Don't use kiq in gpu reset
>
>When in gpu reset, don't use kiq, it will generate more TDR.
>
>Signed-off-by: Emily Deng <Emily.Deng@amd.com>
>---
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 15 ++++-----------
> 1 file changed, 4 insertions(+), 11 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>index eec991f..fcdbacb 100644
>--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>@@ -331,15 +331,8 @@ signed long  amdgpu_kiq_reg_write_reg_wait(struct
>amdgpu_device *adev,
>
> 	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
>
>-	/* don't wait anymore for gpu reset case because this way may
>-	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
>-	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
>-	 * never return if we keep waiting in virt_kiq_rreg, which cause
>-	 * gpu_recover() hang there.
>-	 *
>-	 * also don't wait anymore for IRQ context
>-	 * */
>-	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
>+	/* don't wait anymore for IRQ context */
>+	if (r < 1 && in_interrupt())
> 		goto failed_kiq;
>
> 	might_sleep();
>@@ -387,8 +380,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct
>amdgpu_device *adev,
> 		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
>
> 		if (adev->gfx.kiq.ring.ready &&
>-		    (amdgpu_sriov_runtime(adev) ||
>-		     !amdgpu_sriov_vf(adev))) {
>+		    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))
>&&
>+		    !adev->in_gpu_reset) {
> 			r = amdgpu_kiq_reg_write_reg_wait(adev, hub-
>>vm_inv_eng0_req + eng,
> 				hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
> 			if (!r)
>--
>2.7.4
>
>_______________________________________________
>amd-gfx mailing list
>amd-gfx@lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] drm/amdgpu: Don't use kiq in gpu reset
       [not found] ` <1534912762-30235-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2018-08-22 12:19   ` [PATCH 1/3] drm/amdgpu: Don't use kiq in gpu reset Deng, Emily
@ 2018-08-22 12:23   ` Christian König
       [not found]     ` <fb256e36-59ca-14f2-09ba-bc7c1d9eb478-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  3 siblings, 1 reply; 6+ messages in thread
From: Christian König @ 2018-08-22 12:23 UTC (permalink / raw)
  To: Emily Deng, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 22.08.2018 um 06:39 schrieb Emily Deng:
> When in gpu reset, don't use kiq, it will generate more TDR.
>
> Signed-off-by: Emily Deng <Emily.Deng@amd.com>

Patch #1 is Reviewed-by: Christian König <christian.koenig@amd.com>.

Patch #2 actually not necessary since we should never flush the tlb from 
interrupt context.

Patch #3: I would actually rather keep that an error message cause it 
still means that something went wrong.

Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 15 ++++-----------
>   1 file changed, 4 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index eec991f..fcdbacb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -331,15 +331,8 @@ signed long  amdgpu_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
>   
>   	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
>   
> -	/* don't wait anymore for gpu reset case because this way may
> -	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
> -	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
> -	 * never return if we keep waiting in virt_kiq_rreg, which cause
> -	 * gpu_recover() hang there.
> -	 *
> -	 * also don't wait anymore for IRQ context
> -	 * */
> -	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
> +	/* don't wait anymore for IRQ context */
> +	if (r < 1 && in_interrupt())
>   		goto failed_kiq;
>   
>   	might_sleep();
> @@ -387,8 +380,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
>   		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
>   
>   		if (adev->gfx.kiq.ring.ready &&
> -		    (amdgpu_sriov_runtime(adev) ||
> -		     !amdgpu_sriov_vf(adev))) {
> +		    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
> +		    !adev->in_gpu_reset) {
>   			r = amdgpu_kiq_reg_write_reg_wait(adev, hub->vm_inv_eng0_req + eng,
>   				hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
>   			if (!r)

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH 1/3] drm/amdgpu: Don't use kiq in gpu reset
       [not found]     ` <fb256e36-59ca-14f2-09ba-bc7c1d9eb478-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2018-08-22 12:28       ` Deng, Emily
  0 siblings, 0 replies; 6+ messages in thread
From: Deng, Emily @ 2018-08-22 12:28 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

>-----Original Message-----
>From: Christian König <ckoenig.leichtzumerken@gmail.com>
>Sent: Wednesday, August 22, 2018 8:24 PM
>To: Deng, Emily <Emily.Deng@amd.com>; amd-gfx@lists.freedesktop.org
>Subject: Re: [PATCH 1/3] drm/amdgpu: Don't use kiq in gpu reset
>
>Am 22.08.2018 um 06:39 schrieb Emily Deng:
>> When in gpu reset, don't use kiq, it will generate more TDR.
>>
>> Signed-off-by: Emily Deng <Emily.Deng@amd.com>
>
>Patch #1 is Reviewed-by: Christian König <christian.koenig@amd.com>.
>
>Patch #2 actually not necessary since we should never flush the tlb from
>interrupt context.
Ok, if have the constraint, then ignore the patch.
>
>Patch #3: I would actually rather keep that an error message cause it still means
>that something went wrong.
Ok, then ignore the patch.
>Christian.
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 15 ++++-----------
>>   1 file changed, 4 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> index eec991f..fcdbacb 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>> @@ -331,15 +331,8 @@ signed long  amdgpu_kiq_reg_write_reg_wait(struct
>> amdgpu_device *adev,
>>
>>   	r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
>>
>> -	/* don't wait anymore for gpu reset case because this way may
>> -	 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
>> -	 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
>> -	 * never return if we keep waiting in virt_kiq_rreg, which cause
>> -	 * gpu_recover() hang there.
>> -	 *
>> -	 * also don't wait anymore for IRQ context
>> -	 * */
>> -	if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
>> +	/* don't wait anymore for IRQ context */
>> +	if (r < 1 && in_interrupt())
>>   		goto failed_kiq;
>>
>>   	might_sleep();
>> @@ -387,8 +380,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct
>amdgpu_device *adev,
>>   		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
>>
>>   		if (adev->gfx.kiq.ring.ready &&
>> -		    (amdgpu_sriov_runtime(adev) ||
>> -		     !amdgpu_sriov_vf(adev))) {
>> +		    (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))
>&&
>> +		    !adev->in_gpu_reset) {
>>   			r = amdgpu_kiq_reg_write_reg_wait(adev, hub-
>>vm_inv_eng0_req + eng,
>>   				hub->vm_inv_eng0_ack + eng, tmp, 1 << vmid);
>>   			if (!r)

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-08-22 12:28 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-22  4:39 [PATCH 1/3] drm/amdgpu: Don't use kiq in gpu reset Emily Deng
     [not found] ` <1534912762-30235-1-git-send-email-Emily.Deng-5C7GfCeVMHo@public.gmane.org>
2018-08-22  4:39   ` [PATCH 2/3] drm/amdgpu: Don't use kiq in interrupt Emily Deng
2018-08-22  4:39   ` [PATCH 3/3] drm/amdgpu: Use warn to replace error report Emily Deng
2018-08-22 12:19   ` [PATCH 1/3] drm/amdgpu: Don't use kiq in gpu reset Deng, Emily
2018-08-22 12:23   ` Christian König
     [not found]     ` <fb256e36-59ca-14f2-09ba-bc7c1d9eb478-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-08-22 12:28       ` Deng, Emily

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.