* [PATCH 7/7] drm/amdgpu: recover vram bo from shadow after gpu reset V2
@ 2016-08-18 11:17 Chunming Zhou
[not found] ` <1471519071-30545-1-git-send-email-David1.Zhou-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 3+ messages in thread
From: Chunming Zhou @ 2016-08-18 11:17 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Chunming Zhou
V2:
1. don't directly submit to many jobs at the same time.
2. delete unrelated printk.
Change-Id: I963598ba6eb44bc8620d70e026c0175d1a1de120
Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 58 +++++++++++++++++++++++++++++-
1 file changed, 57 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c9b15c0..0bedb26 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2120,6 +2120,35 @@ bool amdgpu_need_backup(struct amdgpu_device *adev)
return amdgpu_lockup_timeout > 0 ? true : false;
}
+static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_bo *bo,
+ struct fence **fence)
+{
+ uint32_t domain;
+ int r;
+
+ if (!bo->shadow)
+ return 0;
+
+ r = amdgpu_bo_reserve(bo, false);
+ if (r)
+ return r;
+ domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
+ /* if bo has been evicted, then no need to recover */
+ if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+ r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
+ NULL, fence, true);
+ if (r) {
+ DRM_ERROR("recover page table failed!\n");
+ goto err;
+ }
+ }
+err:
+ amdgpu_bo_unreserve(bo);
+ return r;
+}
+
/**
* amdgpu_gpu_reset - reset the asic
*
@@ -2202,13 +2231,40 @@ retry:
if (r) {
dev_err(adev->dev, "ib ring test failed (%d).\n", r);
r = amdgpu_suspend(adev);
+ need_full_reset = true;
goto retry;
}
-
+ /**
+ * recovery vm page tables, since we cannot depend on VRAM is
+ * consistent after gpu full reset.
+ */
+ if (need_full_reset && amdgpu_need_backup(adev)) {
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct amdgpu_bo *bo, *tmp;
+ struct fence *fence = NULL, *next = NULL;
+
+ DRM_INFO("recover vram bo from shadow\n");
+ mutex_lock(&adev->shadow_list_lock);
+ list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
+ amdgpu_recover_vram_from_shadow(adev, ring, bo, &next);
+ if (fence)
+ fence_wait(fence, false);
+ fence_put(fence);
+ fence = next;
+ }
+ mutex_unlock(&adev->shadow_list_lock);
+ if (fence) {
+ r = fence_wait(fence, false);
+ if (r)
+ WARN(r, "recovery from shadow isn't comleted\n");
+ }
+ fence_put(fence);
+ }
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring)
continue;
+
amd_sched_job_recovery(&ring->sched);
kthread_unpark(ring->sched.thread);
}
--
1.9.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH 7/7] drm/amdgpu: recover vram bo from shadow after gpu reset V2
[not found] ` <1471519071-30545-1-git-send-email-David1.Zhou-5C7GfCeVMHo@public.gmane.org>
@ 2016-08-18 11:58 ` Christian König
[not found] ` <78fefefd-b088-3923-e8a1-5a02bcb2a406-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
0 siblings, 1 reply; 3+ messages in thread
From: Christian König @ 2016-08-18 11:58 UTC (permalink / raw)
To: Chunming Zhou, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Am 18.08.2016 um 13:17 schrieb Chunming Zhou:
> V2:
> 1. don't directly submit to many jobs at the same time.
> 2. delete unrelated printk.
>
> Change-Id: I963598ba6eb44bc8620d70e026c0175d1a1de120
> Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 58 +++++++++++++++++++++++++++++-
> 1 file changed, 57 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index c9b15c0..0bedb26 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2120,6 +2120,35 @@ bool amdgpu_need_backup(struct amdgpu_device *adev)
> return amdgpu_lockup_timeout > 0 ? true : false;
> }
>
> +static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
> + struct amdgpu_ring *ring,
> + struct amdgpu_bo *bo,
> + struct fence **fence)
> +{
> + uint32_t domain;
> + int r;
> +
> + if (!bo->shadow)
> + return 0;
> +
> + r = amdgpu_bo_reserve(bo, false);
> + if (r)
> + return r;
> + domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
> + /* if bo has been evicted, then no need to recover */
> + if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
> + r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
> + NULL, fence, true);
> + if (r) {
> + DRM_ERROR("recover page table failed!\n");
> + goto err;
> + }
> + }
> +err:
> + amdgpu_bo_unreserve(bo);
> + return r;
> +}
> +
> /**
> * amdgpu_gpu_reset - reset the asic
> *
> @@ -2202,13 +2231,40 @@ retry:
> if (r) {
> dev_err(adev->dev, "ib ring test failed (%d).\n", r);
> r = amdgpu_suspend(adev);
> + need_full_reset = true;
> goto retry;
> }
> -
> + /**
> + * recovery vm page tables, since we cannot depend on VRAM is
> + * consistent after gpu full reset.
> + */
> + if (need_full_reset && amdgpu_need_backup(adev)) {
> + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> + struct amdgpu_bo *bo, *tmp;
> + struct fence *fence = NULL, *next = NULL;
> +
> + DRM_INFO("recover vram bo from shadow\n");
> + mutex_lock(&adev->shadow_list_lock);
> + list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
> + amdgpu_recover_vram_from_shadow(adev, ring, bo, &next);
> + if (fence)
> + fence_wait(fence, false);
You should check the return code here as well and abort the loop if
anything fails.
With that fixed the patch is Reviewed-by: Christian König
<christian.koenig@amd.com> as well.
Finally done with that!
Cheers,
Christian.
> + fence_put(fence);
> + fence = next;
> + }
> + mutex_unlock(&adev->shadow_list_lock);
> + if (fence) {
> + r = fence_wait(fence, false);
> + if (r)
> + WARN(r, "recovery from shadow isn't comleted\n");
> + }
> + fence_put(fence);
> + }
> for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
> struct amdgpu_ring *ring = adev->rings[i];
> if (!ring)
> continue;
> +
> amd_sched_job_recovery(&ring->sched);
> kthread_unpark(ring->sched.thread);
> }
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH 7/7] drm/amdgpu: recover vram bo from shadow after gpu reset V2
[not found] ` <78fefefd-b088-3923-e8a1-5a02bcb2a406-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2016-08-19 1:54 ` zhoucm1
0 siblings, 0 replies; 3+ messages in thread
From: zhoucm1 @ 2016-08-19 1:54 UTC (permalink / raw)
To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
On 2016年08月18日 19:58, Christian König wrote:
> Am 18.08.2016 um 13:17 schrieb Chunming Zhou:
>> V2:
>> 1. don't directly submit to many jobs at the same time.
>> 2. delete unrelated printk.
>>
>> Change-Id: I963598ba6eb44bc8620d70e026c0175d1a1de120
>> Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 58
>> +++++++++++++++++++++++++++++-
>> 1 file changed, 57 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index c9b15c0..0bedb26 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -2120,6 +2120,35 @@ bool amdgpu_need_backup(struct amdgpu_device
>> *adev)
>> return amdgpu_lockup_timeout > 0 ? true : false;
>> }
>> +static int amdgpu_recover_vram_from_shadow(struct amdgpu_device
>> *adev,
>> + struct amdgpu_ring *ring,
>> + struct amdgpu_bo *bo,
>> + struct fence **fence)
>> +{
>> + uint32_t domain;
>> + int r;
>> +
>> + if (!bo->shadow)
>> + return 0;
>> +
>> + r = amdgpu_bo_reserve(bo, false);
>> + if (r)
>> + return r;
>> + domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
>> + /* if bo has been evicted, then no need to recover */
>> + if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
>> + r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
>> + NULL, fence, true);
>> + if (r) {
>> + DRM_ERROR("recover page table failed!\n");
>> + goto err;
>> + }
>> + }
>> +err:
>> + amdgpu_bo_unreserve(bo);
>> + return r;
>> +}
>> +
>> /**
>> * amdgpu_gpu_reset - reset the asic
>> *
>> @@ -2202,13 +2231,40 @@ retry:
>> if (r) {
>> dev_err(adev->dev, "ib ring test failed (%d).\n", r);
>> r = amdgpu_suspend(adev);
>> + need_full_reset = true;
>> goto retry;
>> }
>> -
>> + /**
>> + * recovery vm page tables, since we cannot depend on VRAM is
>> + * consistent after gpu full reset.
>> + */
>> + if (need_full_reset && amdgpu_need_backup(adev)) {
>> + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>> + struct amdgpu_bo *bo, *tmp;
>> + struct fence *fence = NULL, *next = NULL;
>> +
>> + DRM_INFO("recover vram bo from shadow\n");
>> + mutex_lock(&adev->shadow_list_lock);
>> + list_for_each_entry_safe(bo, tmp, &adev->shadow_list,
>> shadow_list) {
>> + amdgpu_recover_vram_from_shadow(adev, ring, bo, &next);
>> + if (fence)
>> + fence_wait(fence, false);
>
> You should check the return code here as well and abort the loop if
> anything fails.
>
> With that fixed the patch is Reviewed-by: Christian König
> <christian.koenig@amd.com> as well.
>
> Finally done with that!
Agree, Thanks very much for review. Next will be 'recover gart table':)
Cheers,
David Zhou
>
> Cheers,
> Christian.
>
>> + fence_put(fence);
>> + fence = next;
>> + }
>> + mutex_unlock(&adev->shadow_list_lock);
>> + if (fence) {
>> + r = fence_wait(fence, false);
>> + if (r)
>> + WARN(r, "recovery from shadow isn't comleted\n");
>> + }
>> + fence_put(fence);
>> + }
>> for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>> struct amdgpu_ring *ring = adev->rings[i];
>> if (!ring)
>> continue;
>> +
>> amd_sched_job_recovery(&ring->sched);
>> kthread_unpark(ring->sched.thread);
>> }
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2016-08-19 1:54 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-18 11:17 [PATCH 7/7] drm/amdgpu: recover vram bo from shadow after gpu reset V2 Chunming Zhou
[not found] ` <1471519071-30545-1-git-send-email-David1.Zhou-5C7GfCeVMHo@public.gmane.org>
2016-08-18 11:58 ` Christian König
[not found] ` <78fefefd-b088-3923-e8a1-5a02bcb2a406-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2016-08-19 1:54 ` zhoucm1
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.