All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu:put CSA unmap after sched_entity_fini
@ 2017-01-13  4:11 Monk Liu
       [not found] ` <1484280664-22845-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 4+ messages in thread
From: Monk Liu @ 2017-01-13  4:11 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

otherwise CSA may unmapped before gpu_scheduler scheduling
jobs and trigger VM fault on CSA address

Change-Id: Ib2e25ededf89bca44c764477dd2f9127024ca78c
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 8 --------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 8 ++++++++
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 45484c0..e13cdde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -694,14 +694,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	amdgpu_uvd_free_handles(adev, file_priv);
 	amdgpu_vce_free_handles(adev, file_priv);
 
-	if (amdgpu_sriov_vf(adev)) {
-		/* TODO: how to handle reserve failure */
-		BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false));
-		amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va);
-		fpriv->vm.csa_bo_va = NULL;
-		amdgpu_bo_unreserve(adev->virt.csa_obj);
-	}
-
 	amdgpu_vm_fini(adev, &fpriv->vm);
 
 	idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d05546e..94098bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1608,6 +1608,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
 	amd_sched_entity_fini(vm->entity.sched, &vm->entity);
 
+	if (amdgpu_sriov_vf(adev)) {
+		/* TODO: how to handle reserve failure */
+		BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false));
+		amdgpu_vm_bo_rmv(adev, vm->csa_bo_va);
+		vm->csa_bo_va = NULL;
+		amdgpu_bo_unreserve(adev->virt.csa_obj);
+	}
+
 	if (!RB_EMPTY_ROOT(&vm->va)) {
 		dev_err(adev->dev, "still active bo inside vm\n");
 	}
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdgpu:put CSA unmap after sched_entity_fini
       [not found] ` <1484280664-22845-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2017-01-13  9:25   ` Christian König
       [not found]     ` <dadee34c-ca4c-6a2a-8053-4bfdeb1466c3-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 4+ messages in thread
From: Christian König @ 2017-01-13  9:25 UTC (permalink / raw)
  To: Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 13.01.2017 um 05:11 schrieb Monk Liu:
> otherwise CSA may unmapped before gpu_scheduler scheduling
> jobs and trigger VM fault on CSA address
>
> Change-Id: Ib2e25ededf89bca44c764477dd2f9127024ca78c
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Did you really run into an issue because of that?

Calling amdgpu_vm_bo_rmv() shouldn't affect the page tables nor already 
submitted command submissions in any way.

Regards,
Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 8 --------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 8 ++++++++
>   2 files changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 45484c0..e13cdde 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -694,14 +694,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>   	amdgpu_uvd_free_handles(adev, file_priv);
>   	amdgpu_vce_free_handles(adev, file_priv);
>   
> -	if (amdgpu_sriov_vf(adev)) {
> -		/* TODO: how to handle reserve failure */
> -		BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false));
> -		amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va);
> -		fpriv->vm.csa_bo_va = NULL;
> -		amdgpu_bo_unreserve(adev->virt.csa_obj);
> -	}
> -
>   	amdgpu_vm_fini(adev, &fpriv->vm);
>   
>   	idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index d05546e..94098bc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1608,6 +1608,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   
>   	amd_sched_entity_fini(vm->entity.sched, &vm->entity);
>   
> +	if (amdgpu_sriov_vf(adev)) {
> +		/* TODO: how to handle reserve failure */
> +		BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false));
> +		amdgpu_vm_bo_rmv(adev, vm->csa_bo_va);
> +		vm->csa_bo_va = NULL;
> +		amdgpu_bo_unreserve(adev->virt.csa_obj);
> +	}
> +
>   	if (!RB_EMPTY_ROOT(&vm->va)) {
>   		dev_err(adev->dev, "still active bo inside vm\n");
>   	}


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* 答复: [PATCH] drm/amdgpu:put CSA unmap after sched_entity_fini
       [not found]     ` <dadee34c-ca4c-6a2a-8053-4bfdeb1466c3-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-01-13  9:56       ` Liu, Monk
       [not found]         ` <BY2PR1201MB11102468908DDFD083C45B5384780-O28G1zQ8oGliQkyLPkmea2rFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
  0 siblings, 1 reply; 4+ messages in thread
From: Liu, Monk @ 2017-01-13  9:56 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 3349 bytes --]

only with amdgpu_vm_bo_rmv() won't has such bug, but in another branch for sriov, we not only call vm_bo_rmv(), and we also set csa_addr to NULL after it, so the NULL address is inserted in RB, and when preemption occured, CP backup snapshot to NULL address.


although in staging-4.9 we didn't set csa_addr to NULL (because as you suggested we always use HARDCODE/MACRO for CSA address), but logically we'd better put CSA unmapping stuffs behind "sched_entity_fini", which is more reasonable ...


BR Monk

________________________________
发件人: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> 代表 Christian König <deathsimple@vodafone.de>
发送时间: 2017年1月13日 17:25:09
收件人: Liu, Monk; amd-gfx@lists.freedesktop.org
主题: Re: [PATCH] drm/amdgpu:put CSA unmap after sched_entity_fini

Am 13.01.2017 um 05:11 schrieb Monk Liu:
> otherwise CSA may unmapped before gpu_scheduler scheduling
> jobs and trigger VM fault on CSA address
>
> Change-Id: Ib2e25ededf89bca44c764477dd2f9127024ca78c
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>

Did you really run into an issue because of that?

Calling amdgpu_vm_bo_rmv() shouldn't affect the page tables nor already
submitted command submissions in any way.

Regards,
Christian.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 8 --------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 8 ++++++++
>   2 files changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 45484c0..e13cdde 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -694,14 +694,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>        amdgpu_uvd_free_handles(adev, file_priv);
>        amdgpu_vce_free_handles(adev, file_priv);
>
> -     if (amdgpu_sriov_vf(adev)) {
> -             /* TODO: how to handle reserve failure */
> -             BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false));
> -             amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va);
> -             fpriv->vm.csa_bo_va = NULL;
> -             amdgpu_bo_unreserve(adev->virt.csa_obj);
> -     }
> -
>        amdgpu_vm_fini(adev, &fpriv->vm);
>
>        idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index d05546e..94098bc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1608,6 +1608,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>
>        amd_sched_entity_fini(vm->entity.sched, &vm->entity);
>
> +     if (amdgpu_sriov_vf(adev)) {
> +             /* TODO: how to handle reserve failure */
> +             BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false));
> +             amdgpu_vm_bo_rmv(adev, vm->csa_bo_va);
> +             vm->csa_bo_va = NULL;
> +             amdgpu_bo_unreserve(adev->virt.csa_obj);
> +     }
> +
>        if (!RB_EMPTY_ROOT(&vm->va)) {
>                dev_err(adev->dev, "still active bo inside vm\n");
>        }


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[-- Attachment #1.2: Type: text/html, Size: 6069 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: 答复: [PATCH] drm/amdgpu:put CSA unmap after sched_entity_fini
       [not found]         ` <BY2PR1201MB11102468908DDFD083C45B5384780-O28G1zQ8oGliQkyLPkmea2rFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
@ 2017-01-13 10:23           ` Christian König
  0 siblings, 0 replies; 4+ messages in thread
From: Christian König @ 2017-01-13 10:23 UTC (permalink / raw)
  To: Liu, Monk, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 4109 bytes --]

Ah, in this case please separate the amdgpu_vm_bo_rmv() from setting 
csa_addr to NULL.

Cause amdgpu_vm_bo_rmv() should come before amdgpu_vm_fini() and that in 
turn should become before waiting for the scheduler so that the MM knows 
that the memory is about to be freed.

Regards,
Christian.

Am 13.01.2017 um 10:56 schrieb Liu, Monk:
>
> only with amdgpu_vm_bo_rmv() won't has such bug, but in another branch 
> for sriov, we not only call vm_bo_rmv(), and we also set csa_addr to 
> NULL after it, so the NULL address is inserted in RB, and when 
> preemption occured, CP backup snapshot to NULL address.
>
>
> although in staging-4.9 we didn't set csa_addr to NULL (because as you 
> suggested we always use HARDCODE/MACRO for CSA address), but logically 
> we'd better put CSA unmapping stuffs behind "sched_entity_fini", which 
> is more reasonable ...
>
>
> BR Monk
>
> ------------------------------------------------------------------------
> *发件人:* amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> 代表 Christian 
> König <deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
> *发送时间:* 2017年1月13日 17:25:09
> *收件人:* Liu, Monk; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> *主题:* Re: [PATCH] drm/amdgpu:put CSA unmap after sched_entity_fini
> Am 13.01.2017 um 05:11 schrieb Monk Liu:
> > otherwise CSA may unmapped before gpu_scheduler scheduling
> > jobs and trigger VM fault on CSA address
> >
> > Change-Id: Ib2e25ededf89bca44c764477dd2f9127024ca78c
> > Signed-off-by: Monk Liu <Monk.Liu-5C7GfCeVMHo@public.gmane.org>
>
> Did you really run into an issue because of that?
>
> Calling amdgpu_vm_bo_rmv() shouldn't affect the page tables nor already
> submitted command submissions in any way.
>
> Regards,
> Christian.
>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 8 --------
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 8 ++++++++
> >   2 files changed, 8 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > index 45484c0..e13cdde 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > @@ -694,14 +694,6 @@ void amdgpu_driver_postclose_kms(struct 
> drm_device *dev,
> >        amdgpu_uvd_free_handles(adev, file_priv);
> >        amdgpu_vce_free_handles(adev, file_priv);
> >
> > -     if (amdgpu_sriov_vf(adev)) {
> > -             /* TODO: how to handle reserve failure */
> > - BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false));
> > -             amdgpu_vm_bo_rmv(adev, fpriv->vm.csa_bo_va);
> > -             fpriv->vm.csa_bo_va = NULL;
> > - amdgpu_bo_unreserve(adev->virt.csa_obj);
> > -     }
> > -
> >        amdgpu_vm_fini(adev, &fpriv->vm);
> >
> > idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index d05546e..94098bc 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -1608,6 +1608,14 @@ void amdgpu_vm_fini(struct amdgpu_device 
> *adev, struct amdgpu_vm *vm)
> >
> >        amd_sched_entity_fini(vm->entity.sched, &vm->entity);
> >
> > +     if (amdgpu_sriov_vf(adev)) {
> > +             /* TODO: how to handle reserve failure */
> > + BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, false));
> > +             amdgpu_vm_bo_rmv(adev, vm->csa_bo_va);
> > +             vm->csa_bo_va = NULL;
> > + amdgpu_bo_unreserve(adev->virt.csa_obj);
> > +     }
> > +
> >        if (!RB_EMPTY_ROOT(&vm->va)) {
> >                dev_err(adev->dev, "still active bo inside vm\n");
> >        }
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx



[-- Attachment #1.2: Type: text/html, Size: 8337 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2017-01-13 10:23 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-13  4:11 [PATCH] drm/amdgpu:put CSA unmap after sched_entity_fini Monk Liu
     [not found] ` <1484280664-22845-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2017-01-13  9:25   ` Christian König
     [not found]     ` <dadee34c-ca4c-6a2a-8053-4bfdeb1466c3-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-01-13  9:56       ` 答复: " Liu, Monk
     [not found]         ` <BY2PR1201MB11102468908DDFD083C45B5384780-O28G1zQ8oGliQkyLPkmea2rFom/aUZj6nBOFsp37pqbUKgpGm//BTAC/G2K4zDHf@public.gmane.org>
2017-01-13 10:23           ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.