All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 7/7] drm/amdgpu: recover vram bo from shadow after gpu reset V2
@ 2016-08-18 11:17 Chunming Zhou
       [not found] ` <1471519071-30545-1-git-send-email-David1.Zhou-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 3+ messages in thread
From: Chunming Zhou @ 2016-08-18 11:17 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Chunming Zhou

V2:
1. don't directly submit to many jobs at the same time.
2. delete unrelated printk.

Change-Id: I963598ba6eb44bc8620d70e026c0175d1a1de120
Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 58 +++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c9b15c0..0bedb26 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2120,6 +2120,35 @@ bool amdgpu_need_backup(struct amdgpu_device *adev)
 	return amdgpu_lockup_timeout > 0 ? true : false;
 }
 
+static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
+					   struct amdgpu_ring *ring,
+					   struct amdgpu_bo *bo,
+					   struct fence **fence)
+{
+	uint32_t domain;
+	int r;
+
+       if (!bo->shadow)
+               return 0;
+
+       r = amdgpu_bo_reserve(bo, false);
+       if (r)
+               return r;
+       domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
+       /* if bo has been evicted, then no need to recover */
+       if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+               r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
+						 NULL, fence, true);
+               if (r) {
+                       DRM_ERROR("recover page table failed!\n");
+                       goto err;
+               }
+       }
+err:
+       amdgpu_bo_unreserve(bo);
+       return r;
+}
+
 /**
  * amdgpu_gpu_reset - reset the asic
  *
@@ -2202,13 +2231,40 @@ retry:
 		if (r) {
 			dev_err(adev->dev, "ib ring test failed (%d).\n", r);
 			r = amdgpu_suspend(adev);
+			need_full_reset = true;
 			goto retry;
 		}
-
+		/**
+		 * recovery vm page tables, since we cannot depend on VRAM is
+		 * consistent after gpu full reset.
+		 */
+		if (need_full_reset && amdgpu_need_backup(adev)) {
+			struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+			struct amdgpu_bo *bo, *tmp;
+			struct fence *fence = NULL, *next = NULL;
+
+			DRM_INFO("recover vram bo from shadow\n");
+			mutex_lock(&adev->shadow_list_lock);
+			list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
+				amdgpu_recover_vram_from_shadow(adev, ring, bo, &next);
+				if (fence)
+					fence_wait(fence, false);
+				fence_put(fence);
+				fence = next;
+			}
+			mutex_unlock(&adev->shadow_list_lock);
+			if (fence) {
+				r = fence_wait(fence, false);
+				if (r)
+					WARN(r, "recovery from shadow isn't comleted\n");
+			}
+			fence_put(fence);
+		}
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = adev->rings[i];
 			if (!ring)
 				continue;
+
 			amd_sched_job_recovery(&ring->sched);
 			kthread_unpark(ring->sched.thread);
 		}
-- 
1.9.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 7/7] drm/amdgpu: recover vram bo from shadow after gpu reset V2
       [not found] ` <1471519071-30545-1-git-send-email-David1.Zhou-5C7GfCeVMHo@public.gmane.org>
@ 2016-08-18 11:58   ` Christian König
       [not found]     ` <78fefefd-b088-3923-e8a1-5a02bcb2a406-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 3+ messages in thread
From: Christian König @ 2016-08-18 11:58 UTC (permalink / raw)
  To: Chunming Zhou, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 18.08.2016 um 13:17 schrieb Chunming Zhou:
> V2:
> 1. don't directly submit to many jobs at the same time.
> 2. delete unrelated printk.
>
> Change-Id: I963598ba6eb44bc8620d70e026c0175d1a1de120
> Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 58 +++++++++++++++++++++++++++++-
>   1 file changed, 57 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index c9b15c0..0bedb26 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2120,6 +2120,35 @@ bool amdgpu_need_backup(struct amdgpu_device *adev)
>   	return amdgpu_lockup_timeout > 0 ? true : false;
>   }
>   
> +static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
> +					   struct amdgpu_ring *ring,
> +					   struct amdgpu_bo *bo,
> +					   struct fence **fence)
> +{
> +	uint32_t domain;
> +	int r;
> +
> +       if (!bo->shadow)
> +               return 0;
> +
> +       r = amdgpu_bo_reserve(bo, false);
> +       if (r)
> +               return r;
> +       domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
> +       /* if bo has been evicted, then no need to recover */
> +       if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
> +               r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
> +						 NULL, fence, true);
> +               if (r) {
> +                       DRM_ERROR("recover page table failed!\n");
> +                       goto err;
> +               }
> +       }
> +err:
> +       amdgpu_bo_unreserve(bo);
> +       return r;
> +}
> +
>   /**
>    * amdgpu_gpu_reset - reset the asic
>    *
> @@ -2202,13 +2231,40 @@ retry:
>   		if (r) {
>   			dev_err(adev->dev, "ib ring test failed (%d).\n", r);
>   			r = amdgpu_suspend(adev);
> +			need_full_reset = true;
>   			goto retry;
>   		}
> -
> +		/**
> +		 * recovery vm page tables, since we cannot depend on VRAM is
> +		 * consistent after gpu full reset.
> +		 */
> +		if (need_full_reset && amdgpu_need_backup(adev)) {
> +			struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> +			struct amdgpu_bo *bo, *tmp;
> +			struct fence *fence = NULL, *next = NULL;
> +
> +			DRM_INFO("recover vram bo from shadow\n");
> +			mutex_lock(&adev->shadow_list_lock);
> +			list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
> +				amdgpu_recover_vram_from_shadow(adev, ring, bo, &next);
> +				if (fence)
> +					fence_wait(fence, false);

You should check the return code here as well and abort the loop if 
anything fails.

With that fixed the patch is Reviewed-by: Christian König 
<christian.koenig@amd.com> as well.

Finally done with that!

Cheers,
Christian.

> +				fence_put(fence);
> +				fence = next;
> +			}
> +			mutex_unlock(&adev->shadow_list_lock);
> +			if (fence) {
> +				r = fence_wait(fence, false);
> +				if (r)
> +					WARN(r, "recovery from shadow isn't comleted\n");
> +			}
> +			fence_put(fence);
> +		}
>   		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>   			struct amdgpu_ring *ring = adev->rings[i];
>   			if (!ring)
>   				continue;
> +
>   			amd_sched_job_recovery(&ring->sched);
>   			kthread_unpark(ring->sched.thread);
>   		}


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 7/7] drm/amdgpu: recover vram bo from shadow after gpu reset V2
       [not found]     ` <78fefefd-b088-3923-e8a1-5a02bcb2a406-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2016-08-19  1:54       ` zhoucm1
  0 siblings, 0 replies; 3+ messages in thread
From: zhoucm1 @ 2016-08-19  1:54 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2016年08月18日 19:58, Christian König wrote:
> Am 18.08.2016 um 13:17 schrieb Chunming Zhou:
>> V2:
>> 1. don't directly submit to many jobs at the same time.
>> 2. delete unrelated printk.
>>
>> Change-Id: I963598ba6eb44bc8620d70e026c0175d1a1de120
>> Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 58 
>> +++++++++++++++++++++++++++++-
>>   1 file changed, 57 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index c9b15c0..0bedb26 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -2120,6 +2120,35 @@ bool amdgpu_need_backup(struct amdgpu_device 
>> *adev)
>>       return amdgpu_lockup_timeout > 0 ? true : false;
>>   }
>>   +static int amdgpu_recover_vram_from_shadow(struct amdgpu_device 
>> *adev,
>> +                       struct amdgpu_ring *ring,
>> +                       struct amdgpu_bo *bo,
>> +                       struct fence **fence)
>> +{
>> +    uint32_t domain;
>> +    int r;
>> +
>> +       if (!bo->shadow)
>> +               return 0;
>> +
>> +       r = amdgpu_bo_reserve(bo, false);
>> +       if (r)
>> +               return r;
>> +       domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
>> +       /* if bo has been evicted, then no need to recover */
>> +       if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
>> +               r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
>> +                         NULL, fence, true);
>> +               if (r) {
>> +                       DRM_ERROR("recover page table failed!\n");
>> +                       goto err;
>> +               }
>> +       }
>> +err:
>> +       amdgpu_bo_unreserve(bo);
>> +       return r;
>> +}
>> +
>>   /**
>>    * amdgpu_gpu_reset - reset the asic
>>    *
>> @@ -2202,13 +2231,40 @@ retry:
>>           if (r) {
>>               dev_err(adev->dev, "ib ring test failed (%d).\n", r);
>>               r = amdgpu_suspend(adev);
>> +            need_full_reset = true;
>>               goto retry;
>>           }
>> -
>> +        /**
>> +         * recovery vm page tables, since we cannot depend on VRAM is
>> +         * consistent after gpu full reset.
>> +         */
>> +        if (need_full_reset && amdgpu_need_backup(adev)) {
>> +            struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>> +            struct amdgpu_bo *bo, *tmp;
>> +            struct fence *fence = NULL, *next = NULL;
>> +
>> +            DRM_INFO("recover vram bo from shadow\n");
>> +            mutex_lock(&adev->shadow_list_lock);
>> +            list_for_each_entry_safe(bo, tmp, &adev->shadow_list, 
>> shadow_list) {
>> +                amdgpu_recover_vram_from_shadow(adev, ring, bo, &next);
>> +                if (fence)
>> +                    fence_wait(fence, false);
>
> You should check the return code here as well and abort the loop if 
> anything fails.
>
> With that fixed the patch is Reviewed-by: Christian König 
> <christian.koenig@amd.com> as well.
>
> Finally done with that!
Agree, Thanks very much for review. Next will be 'recover gart table':)

Cheers,
David Zhou

>
> Cheers,
> Christian.
>
>> +                fence_put(fence);
>> +                fence = next;
>> +            }
>> +            mutex_unlock(&adev->shadow_list_lock);
>> +            if (fence) {
>> +                r = fence_wait(fence, false);
>> +                if (r)
>> +                    WARN(r, "recovery from shadow isn't comleted\n");
>> +            }
>> +            fence_put(fence);
>> +        }
>>           for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>>               struct amdgpu_ring *ring = adev->rings[i];
>>               if (!ring)
>>                   continue;
>> +
>>               amd_sched_job_recovery(&ring->sched);
>>               kthread_unpark(ring->sched.thread);
>>           }
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-08-19  1:54 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-18 11:17 [PATCH 7/7] drm/amdgpu: recover vram bo from shadow after gpu reset V2 Chunming Zhou
     [not found] ` <1471519071-30545-1-git-send-email-David1.Zhou-5C7GfCeVMHo@public.gmane.org>
2016-08-18 11:58   ` Christian König
     [not found]     ` <78fefefd-b088-3923-e8a1-5a02bcb2a406-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2016-08-19  1:54       ` zhoucm1

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.