All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdkfd: don't add DOORBELL and MMIO BOs to validate list
@ 2022-05-25  8:43 Lang Yu
  2022-05-25  8:45 ` Christian König
  0 siblings, 1 reply; 9+ messages in thread
From: Lang Yu @ 2022-05-25  8:43 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher, Felix Kuehling, Huang Rui, Lang Yu

DOORBELL and MMIO BOs never move once created.
No need to validate them after that.

Signed-off-by: Lang Yu <Lang.Yu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 34ba9e776521..45de9cadd771 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -808,6 +808,10 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
 	struct ttm_validate_buffer *entry = &mem->validate_list;
 	struct amdgpu_bo *bo = mem->bo;
 
+	if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
+		return;
+
 	INIT_LIST_HEAD(&entry->head);
 	entry->num_shared = 1;
 	entry->bo = &bo->tbo;
@@ -824,6 +828,10 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
 {
 	struct ttm_validate_buffer *bo_list_entry;
 
+	if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
+				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
+		return;
+
 	bo_list_entry = &mem->validate_list;
 	mutex_lock(&process_info->lock);
 	list_del(&bo_list_entry->head);
@@ -1649,7 +1657,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	unsigned long bo_size = mem->bo->tbo.base.size;
 	struct kfd_mem_attachment *entry, *tmp;
 	struct bo_vm_reservation_context ctx;
-	struct ttm_validate_buffer *bo_list_entry;
 	unsigned int mapped_to_gpu_memory;
 	int ret;
 	bool is_imported = false;
@@ -1677,10 +1684,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	}
 
 	/* Make sure restore workers don't access the BO any more */
-	bo_list_entry = &mem->validate_list;
-	mutex_lock(&process_info->lock);
-	list_del(&bo_list_entry->head);
-	mutex_unlock(&process_info->lock);
+	remove_kgd_mem_from_kfd_bo_list(mem, process_info);
 
 	/* No more MMU notifiers */
 	amdgpu_mn_unregister(mem->bo);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdkfd: don't add DOORBELL and MMIO BOs to validate list
  2022-05-25  8:43 [PATCH] drm/amdkfd: don't add DOORBELL and MMIO BOs to validate list Lang Yu
@ 2022-05-25  8:45 ` Christian König
  2022-05-25  9:25   ` Lang Yu
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2022-05-25  8:45 UTC (permalink / raw)
  To: Lang Yu, amd-gfx; +Cc: Alex Deucher, Felix Kuehling, Huang Rui

Am 25.05.22 um 10:43 schrieb Lang Yu:
> DOORBELL and MMIO BOs never move once created.
> No need to validate them after that.

Yeah, but you still need to make sure their page tables are up to date.

So this here might break horrible.

Christian.

>
> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 +++++++++-----
>   1 file changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 34ba9e776521..45de9cadd771 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -808,6 +808,10 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
>   	struct ttm_validate_buffer *entry = &mem->validate_list;
>   	struct amdgpu_bo *bo = mem->bo;
>   
> +	if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> +				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
> +		return;
> +
>   	INIT_LIST_HEAD(&entry->head);
>   	entry->num_shared = 1;
>   	entry->bo = &bo->tbo;
> @@ -824,6 +828,10 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
>   {
>   	struct ttm_validate_buffer *bo_list_entry;
>   
> +	if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> +				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
> +		return;
> +
>   	bo_list_entry = &mem->validate_list;
>   	mutex_lock(&process_info->lock);
>   	list_del(&bo_list_entry->head);
> @@ -1649,7 +1657,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>   	unsigned long bo_size = mem->bo->tbo.base.size;
>   	struct kfd_mem_attachment *entry, *tmp;
>   	struct bo_vm_reservation_context ctx;
> -	struct ttm_validate_buffer *bo_list_entry;
>   	unsigned int mapped_to_gpu_memory;
>   	int ret;
>   	bool is_imported = false;
> @@ -1677,10 +1684,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>   	}
>   
>   	/* Make sure restore workers don't access the BO any more */
> -	bo_list_entry = &mem->validate_list;
> -	mutex_lock(&process_info->lock);
> -	list_del(&bo_list_entry->head);
> -	mutex_unlock(&process_info->lock);
> +	remove_kgd_mem_from_kfd_bo_list(mem, process_info);
>   
>   	/* No more MMU notifiers */
>   	amdgpu_mn_unregister(mem->bo);


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdkfd: don't add DOORBELL and MMIO BOs to validate list
  2022-05-25  8:45 ` Christian König
@ 2022-05-25  9:25   ` Lang Yu
  2022-05-25 10:37     ` Christian König
  0 siblings, 1 reply; 9+ messages in thread
From: Lang Yu @ 2022-05-25  9:25 UTC (permalink / raw)
  To: Christian König; +Cc: Alex Deucher, Felix Kuehling, Huang Rui, amd-gfx

On 05/25/ , Christian König wrote:
> Am 25.05.22 um 10:43 schrieb Lang Yu:
> > DOORBELL and MMIO BOs never move once created.
> > No need to validate them after that.
> 
> Yeah, but you still need to make sure their page tables are up to date.
> 
> So this here might break horrible.

These BOs(and attachments) are validated when allocated and mapped.
Their page tables should be determined at this time. 

The kfd_bo_list is used to restore BOs after evictions.

Do you mean their page tabes could be changed? Thanks. 


> Christian.
> 
> > 
> > Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 +++++++++-----
> >   1 file changed, 9 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > index 34ba9e776521..45de9cadd771 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > @@ -808,6 +808,10 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
> >   	struct ttm_validate_buffer *entry = &mem->validate_list;
> >   	struct amdgpu_bo *bo = mem->bo;
> > +	if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> > +				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
> > +		return;
> > +
> >   	INIT_LIST_HEAD(&entry->head);
> >   	entry->num_shared = 1;
> >   	entry->bo = &bo->tbo;
> > @@ -824,6 +828,10 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
> >   {
> >   	struct ttm_validate_buffer *bo_list_entry;
> > +	if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> > +				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
> > +		return;
> > +
> >   	bo_list_entry = &mem->validate_list;
> >   	mutex_lock(&process_info->lock);
> >   	list_del(&bo_list_entry->head);
> > @@ -1649,7 +1657,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> >   	unsigned long bo_size = mem->bo->tbo.base.size;
> >   	struct kfd_mem_attachment *entry, *tmp;
> >   	struct bo_vm_reservation_context ctx;
> > -	struct ttm_validate_buffer *bo_list_entry;
> >   	unsigned int mapped_to_gpu_memory;
> >   	int ret;
> >   	bool is_imported = false;
> > @@ -1677,10 +1684,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> >   	}
> >   	/* Make sure restore workers don't access the BO any more */
> > -	bo_list_entry = &mem->validate_list;
> > -	mutex_lock(&process_info->lock);
> > -	list_del(&bo_list_entry->head);
> > -	mutex_unlock(&process_info->lock);
> > +	remove_kgd_mem_from_kfd_bo_list(mem, process_info);
> >   	/* No more MMU notifiers */
> >   	amdgpu_mn_unregister(mem->bo);
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdkfd: don't add DOORBELL and MMIO BOs to validate list
  2022-05-25  9:25   ` Lang Yu
@ 2022-05-25 10:37     ` Christian König
  2022-05-25 11:37       ` Lang Yu
  2022-05-25 22:17       ` Felix Kuehling
  0 siblings, 2 replies; 9+ messages in thread
From: Christian König @ 2022-05-25 10:37 UTC (permalink / raw)
  To: Lang Yu; +Cc: Alex Deucher, Felix Kuehling, Huang Rui, amd-gfx

Am 25.05.22 um 11:25 schrieb Lang Yu:
> On 05/25/ , Christian König wrote:
>> Am 25.05.22 um 10:43 schrieb Lang Yu:
>>> DOORBELL and MMIO BOs never move once created.
>>> No need to validate them after that.
>> Yeah, but you still need to make sure their page tables are up to date.
>>
>> So this here might break horrible.
> These BOs(and attachments) are validated when allocated and mapped.
> Their page tables should be determined at this time.
>
> The kfd_bo_list is used to restore BOs after evictions.
>
> Do you mean their page tabes could be changed? Thanks.

Yes, page tables can be destroyed under memory pressure as well.

Not sure how the KFD handles that, but in theory we should have every BO 
used by a process on the validation list. Even the ones pinned.

Regards,
Christian.

>
>
>> Christian.
>>
>>> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 +++++++++-----
>>>    1 file changed, 9 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index 34ba9e776521..45de9cadd771 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> @@ -808,6 +808,10 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
>>>    	struct ttm_validate_buffer *entry = &mem->validate_list;
>>>    	struct amdgpu_bo *bo = mem->bo;
>>> +	if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
>>> +				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
>>> +		return;
>>> +
>>>    	INIT_LIST_HEAD(&entry->head);
>>>    	entry->num_shared = 1;
>>>    	entry->bo = &bo->tbo;
>>> @@ -824,6 +828,10 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
>>>    {
>>>    	struct ttm_validate_buffer *bo_list_entry;
>>> +	if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
>>> +				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
>>> +		return;
>>> +
>>>    	bo_list_entry = &mem->validate_list;
>>>    	mutex_lock(&process_info->lock);
>>>    	list_del(&bo_list_entry->head);
>>> @@ -1649,7 +1657,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>    	unsigned long bo_size = mem->bo->tbo.base.size;
>>>    	struct kfd_mem_attachment *entry, *tmp;
>>>    	struct bo_vm_reservation_context ctx;
>>> -	struct ttm_validate_buffer *bo_list_entry;
>>>    	unsigned int mapped_to_gpu_memory;
>>>    	int ret;
>>>    	bool is_imported = false;
>>> @@ -1677,10 +1684,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>    	}
>>>    	/* Make sure restore workers don't access the BO any more */
>>> -	bo_list_entry = &mem->validate_list;
>>> -	mutex_lock(&process_info->lock);
>>> -	list_del(&bo_list_entry->head);
>>> -	mutex_unlock(&process_info->lock);
>>> +	remove_kgd_mem_from_kfd_bo_list(mem, process_info);
>>>    	/* No more MMU notifiers */
>>>    	amdgpu_mn_unregister(mem->bo);


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdkfd: don't add DOORBELL and MMIO BOs to validate list
  2022-05-25 10:37     ` Christian König
@ 2022-05-25 11:37       ` Lang Yu
  2022-05-25 11:43         ` Christian König
  2022-05-25 22:17       ` Felix Kuehling
  1 sibling, 1 reply; 9+ messages in thread
From: Lang Yu @ 2022-05-25 11:37 UTC (permalink / raw)
  To: Christian König; +Cc: Alex Deucher, Felix Kuehling, Huang Rui, amd-gfx

On 05/25/ , Christian König wrote:
> Am 25.05.22 um 11:25 schrieb Lang Yu:
> > On 05/25/ , Christian König wrote:
> > > Am 25.05.22 um 10:43 schrieb Lang Yu:
> > > > DOORBELL and MMIO BOs never move once created.
> > > > No need to validate them after that.
> > > Yeah, but you still need to make sure their page tables are up to date.
> > > 
> > > So this here might break horrible.
> > These BOs(and attachments) are validated when allocated and mapped.
> > Their page tables should be determined at this time.
> > 
> > The kfd_bo_list is used to restore BOs after evictions.
> > 
> > Do you mean their page tabes could be changed? Thanks.
> 
> Yes, page tables can be destroyed under memory pressure as well.

Destroyed? You mean the contents of page table BOs are disappeared.
If so, could other BOs be destroyed under memory pressure? Thanks!

Regards,
Lang

> Not sure how the KFD handles that, but in theory we should have every BO
> used by a process on the validation list. Even the ones pinned.
> 
> Regards,
> Christian.
> 
> > 
> > 
> > > Christian.
> > > 
> > > > Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> > > > ---
> > > >    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 +++++++++-----
> > > >    1 file changed, 9 insertions(+), 5 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > > > index 34ba9e776521..45de9cadd771 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > > > @@ -808,6 +808,10 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
> > > >    	struct ttm_validate_buffer *entry = &mem->validate_list;
> > > >    	struct amdgpu_bo *bo = mem->bo;
> > > > +	if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> > > > +				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
> > > > +		return;
> > > > +
> > > >    	INIT_LIST_HEAD(&entry->head);
> > > >    	entry->num_shared = 1;
> > > >    	entry->bo = &bo->tbo;
> > > > @@ -824,6 +828,10 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
> > > >    {
> > > >    	struct ttm_validate_buffer *bo_list_entry;
> > > > +	if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> > > > +				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
> > > > +		return;
> > > > +
> > > >    	bo_list_entry = &mem->validate_list;
> > > >    	mutex_lock(&process_info->lock);
> > > >    	list_del(&bo_list_entry->head);
> > > > @@ -1649,7 +1657,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> > > >    	unsigned long bo_size = mem->bo->tbo.base.size;
> > > >    	struct kfd_mem_attachment *entry, *tmp;
> > > >    	struct bo_vm_reservation_context ctx;
> > > > -	struct ttm_validate_buffer *bo_list_entry;
> > > >    	unsigned int mapped_to_gpu_memory;
> > > >    	int ret;
> > > >    	bool is_imported = false;
> > > > @@ -1677,10 +1684,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> > > >    	}
> > > >    	/* Make sure restore workers don't access the BO any more */
> > > > -	bo_list_entry = &mem->validate_list;
> > > > -	mutex_lock(&process_info->lock);
> > > > -	list_del(&bo_list_entry->head);
> > > > -	mutex_unlock(&process_info->lock);
> > > > +	remove_kgd_mem_from_kfd_bo_list(mem, process_info);
> > > >    	/* No more MMU notifiers */
> > > >    	amdgpu_mn_unregister(mem->bo);
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdkfd: don't add DOORBELL and MMIO BOs to validate list
  2022-05-25 11:37       ` Lang Yu
@ 2022-05-25 11:43         ` Christian König
  2022-05-25 12:46           ` Lang Yu
  0 siblings, 1 reply; 9+ messages in thread
From: Christian König @ 2022-05-25 11:43 UTC (permalink / raw)
  To: Lang Yu; +Cc: Alex Deucher, Felix Kuehling, Huang Rui, amd-gfx

Am 25.05.22 um 13:37 schrieb Lang Yu:
> On 05/25/ , Christian König wrote:
>> Am 25.05.22 um 11:25 schrieb Lang Yu:
>>> On 05/25/ , Christian König wrote:
>>>> Am 25.05.22 um 10:43 schrieb Lang Yu:
>>>>> DOORBELL and MMIO BOs never move once created.
>>>>> No need to validate them after that.
>>>> Yeah, but you still need to make sure their page tables are up to date.
>>>>
>>>> So this here might break horrible.
>>> These BOs(and attachments) are validated when allocated and mapped.
>>> Their page tables should be determined at this time.
>>>
>>> The kfd_bo_list is used to restore BOs after evictions.
>>>
>>> Do you mean their page tabes could be changed? Thanks.
>> Yes, page tables can be destroyed under memory pressure as well.
> Destroyed? You mean the contents of page table BOs are disappeared.

Currently we try to just free up the backing store of them, but the idea 
is to really get rid of the whole BO under memory pressure.

See page tables are managed in a hierarchy and their content can be 
fully restored from the metadata.

So except for the root PD all page tables in a VM can (in theory) be 
destroyed and re-created when they are not used.

> If so, could other BOs be destroyed under memory pressure? Thanks!

I don't think so, everything else is just referenced somewhere.

Regards,
Christian.

>
> Regards,
> Lang
>
>> Not sure how the KFD handles that, but in theory we should have every BO
>> used by a process on the validation list. Even the ones pinned.
>>
>> Regards,
>> Christian.
>>
>>>
>>>> Christian.
>>>>
>>>>> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>>>>> ---
>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 +++++++++-----
>>>>>     1 file changed, 9 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> index 34ba9e776521..45de9cadd771 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> @@ -808,6 +808,10 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
>>>>>     	struct ttm_validate_buffer *entry = &mem->validate_list;
>>>>>     	struct amdgpu_bo *bo = mem->bo;
>>>>> +	if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
>>>>> +				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
>>>>> +		return;
>>>>> +
>>>>>     	INIT_LIST_HEAD(&entry->head);
>>>>>     	entry->num_shared = 1;
>>>>>     	entry->bo = &bo->tbo;
>>>>> @@ -824,6 +828,10 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
>>>>>     {
>>>>>     	struct ttm_validate_buffer *bo_list_entry;
>>>>> +	if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
>>>>> +				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
>>>>> +		return;
>>>>> +
>>>>>     	bo_list_entry = &mem->validate_list;
>>>>>     	mutex_lock(&process_info->lock);
>>>>>     	list_del(&bo_list_entry->head);
>>>>> @@ -1649,7 +1657,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>>>     	unsigned long bo_size = mem->bo->tbo.base.size;
>>>>>     	struct kfd_mem_attachment *entry, *tmp;
>>>>>     	struct bo_vm_reservation_context ctx;
>>>>> -	struct ttm_validate_buffer *bo_list_entry;
>>>>>     	unsigned int mapped_to_gpu_memory;
>>>>>     	int ret;
>>>>>     	bool is_imported = false;
>>>>> @@ -1677,10 +1684,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>>>     	}
>>>>>     	/* Make sure restore workers don't access the BO any more */
>>>>> -	bo_list_entry = &mem->validate_list;
>>>>> -	mutex_lock(&process_info->lock);
>>>>> -	list_del(&bo_list_entry->head);
>>>>> -	mutex_unlock(&process_info->lock);
>>>>> +	remove_kgd_mem_from_kfd_bo_list(mem, process_info);
>>>>>     	/* No more MMU notifiers */
>>>>>     	amdgpu_mn_unregister(mem->bo);


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdkfd: don't add DOORBELL and MMIO BOs to validate list
  2022-05-25 11:43         ` Christian König
@ 2022-05-25 12:46           ` Lang Yu
  0 siblings, 0 replies; 9+ messages in thread
From: Lang Yu @ 2022-05-25 12:46 UTC (permalink / raw)
  To: Christian König; +Cc: Alex Deucher, Felix Kuehling, Huang Rui, amd-gfx

On 05/25/ , Christian König wrote:
> Am 25.05.22 um 13:37 schrieb Lang Yu:
> > On 05/25/ , Christian König wrote:
> > > Am 25.05.22 um 11:25 schrieb Lang Yu:
> > > > On 05/25/ , Christian König wrote:
> > > > > Am 25.05.22 um 10:43 schrieb Lang Yu:
> > > > > > DOORBELL and MMIO BOs never move once created.
> > > > > > No need to validate them after that.
> > > > > Yeah, but you still need to make sure their page tables are up to date.
> > > > > 
> > > > > So this here might break horrible.
> > > > These BOs(and attachments) are validated when allocated and mapped.
> > > > Their page tables should be determined at this time.
> > > > 
> > > > The kfd_bo_list is used to restore BOs after evictions.
> > > > 
> > > > Do you mean their page tabes could be changed? Thanks.
> > > Yes, page tables can be destroyed under memory pressure as well.
> > Destroyed? You mean the contents of page table BOs are disappeared.
> 
> Currently we try to just free up the backing store of them, but the idea is
> to really get rid of the whole BO under memory pressure.
> 
> See page tables are managed in a hierarchy and their content can be fully
> restored from the metadata.
> 
> So except for the root PD all page tables in a VM can (in theory) be
> destroyed and re-created when they are not used.
> 
> > If so, could other BOs be destroyed under memory pressure? Thanks!
> 
> I don't think so, everything else is just referenced somewhere.

Thanks. I got it. Just curious how do we identify PT BOs when we want
to destroy them under memory pressure? And does this happen in eviction
process?

Regards,
Lang


> Regards,
> Christian.
> 
> > 
> > Regards,
> > Lang
> > 
> > > Not sure how the KFD handles that, but in theory we should have every BO
> > > used by a process on the validation list. Even the ones pinned.
> > > 
> > > Regards,
> > > Christian.
> > > 
> > > > 
> > > > > Christian.
> > > > > 
> > > > > > Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> > > > > > ---
> > > > > >     drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 +++++++++-----
> > > > > >     1 file changed, 9 insertions(+), 5 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > > > > > index 34ba9e776521..45de9cadd771 100644
> > > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> > > > > > @@ -808,6 +808,10 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
> > > > > >     	struct ttm_validate_buffer *entry = &mem->validate_list;
> > > > > >     	struct amdgpu_bo *bo = mem->bo;
> > > > > > +	if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> > > > > > +				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
> > > > > > +		return;
> > > > > > +
> > > > > >     	INIT_LIST_HEAD(&entry->head);
> > > > > >     	entry->num_shared = 1;
> > > > > >     	entry->bo = &bo->tbo;
> > > > > > @@ -824,6 +828,10 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
> > > > > >     {
> > > > > >     	struct ttm_validate_buffer *bo_list_entry;
> > > > > > +	if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
> > > > > > +				KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
> > > > > > +		return;
> > > > > > +
> > > > > >     	bo_list_entry = &mem->validate_list;
> > > > > >     	mutex_lock(&process_info->lock);
> > > > > >     	list_del(&bo_list_entry->head);
> > > > > > @@ -1649,7 +1657,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> > > > > >     	unsigned long bo_size = mem->bo->tbo.base.size;
> > > > > >     	struct kfd_mem_attachment *entry, *tmp;
> > > > > >     	struct bo_vm_reservation_context ctx;
> > > > > > -	struct ttm_validate_buffer *bo_list_entry;
> > > > > >     	unsigned int mapped_to_gpu_memory;
> > > > > >     	int ret;
> > > > > >     	bool is_imported = false;
> > > > > > @@ -1677,10 +1684,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> > > > > >     	}
> > > > > >     	/* Make sure restore workers don't access the BO any more */
> > > > > > -	bo_list_entry = &mem->validate_list;
> > > > > > -	mutex_lock(&process_info->lock);
> > > > > > -	list_del(&bo_list_entry->head);
> > > > > > -	mutex_unlock(&process_info->lock);
> > > > > > +	remove_kgd_mem_from_kfd_bo_list(mem, process_info);
> > > > > >     	/* No more MMU notifiers */
> > > > > >     	amdgpu_mn_unregister(mem->bo);
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdkfd: don't add DOORBELL and MMIO BOs to validate list
  2022-05-25 10:37     ` Christian König
  2022-05-25 11:37       ` Lang Yu
@ 2022-05-25 22:17       ` Felix Kuehling
  2022-05-30  9:57         ` Christian König
  1 sibling, 1 reply; 9+ messages in thread
From: Felix Kuehling @ 2022-05-25 22:17 UTC (permalink / raw)
  To: Christian König, Lang Yu; +Cc: Alex Deucher, Huang Rui, amd-gfx


On 2022-05-25 06:37, Christian König wrote:
> Am 25.05.22 um 11:25 schrieb Lang Yu:
>> On 05/25/ , Christian König wrote:
>>> Am 25.05.22 um 10:43 schrieb Lang Yu:
>>>> DOORBELL and MMIO BOs never move once created.
>>>> No need to validate them after that.
>>> Yeah, but you still need to make sure their page tables are up to date.
>>>
>>> So this here might break horrible.
>> These BOs(and attachments) are validated when allocated and mapped.
>> Their page tables should be determined at this time.
>>
>> The kfd_bo_list is used to restore BOs after evictions.
>>
>> Do you mean their page tabes could be changed? Thanks.
>
> Yes, page tables can be destroyed under memory pressure as well.

Is that actually happening today, or is that some future optimization 
you have in mind? I know page tables can get evicted, but I didn't think 
they were destroyed unless the memory at that address is unmapped (which 
never happens for pinned BOs).


>
> Not sure how the KFD handles that, but in theory we should have every 
> BO used by a process on the validation list. Even the ones pinned.

Then we already have some other broken cases for the small number of 
kmapped BOs that are pinned and currently removed from the validation 
list (see amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel).

Regards,
   Felix


>
> Regards,
> Christian.
>
>>
>>
>>> Christian.
>>>
>>>> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 
>>>> +++++++++-----
>>>>    1 file changed, 9 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>> index 34ba9e776521..45de9cadd771 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>> @@ -808,6 +808,10 @@ static void add_kgd_mem_to_kfd_bo_list(struct 
>>>> kgd_mem *mem,
>>>>        struct ttm_validate_buffer *entry = &mem->validate_list;
>>>>        struct amdgpu_bo *bo = mem->bo;
>>>> +    if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
>>>> +                KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
>>>> +        return;
>>>> +
>>>>        INIT_LIST_HEAD(&entry->head);
>>>>        entry->num_shared = 1;
>>>>        entry->bo = &bo->tbo;
>>>> @@ -824,6 +828,10 @@ static void 
>>>> remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
>>>>    {
>>>>        struct ttm_validate_buffer *bo_list_entry;
>>>> +    if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
>>>> +                KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
>>>> +        return;
>>>> +
>>>>        bo_list_entry = &mem->validate_list;
>>>>        mutex_lock(&process_info->lock);
>>>>        list_del(&bo_list_entry->head);
>>>> @@ -1649,7 +1657,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>>        unsigned long bo_size = mem->bo->tbo.base.size;
>>>>        struct kfd_mem_attachment *entry, *tmp;
>>>>        struct bo_vm_reservation_context ctx;
>>>> -    struct ttm_validate_buffer *bo_list_entry;
>>>>        unsigned int mapped_to_gpu_memory;
>>>>        int ret;
>>>>        bool is_imported = false;
>>>> @@ -1677,10 +1684,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>>        }
>>>>        /* Make sure restore workers don't access the BO any more */
>>>> -    bo_list_entry = &mem->validate_list;
>>>> -    mutex_lock(&process_info->lock);
>>>> -    list_del(&bo_list_entry->head);
>>>> -    mutex_unlock(&process_info->lock);
>>>> +    remove_kgd_mem_from_kfd_bo_list(mem, process_info);
>>>>        /* No more MMU notifiers */
>>>>        amdgpu_mn_unregister(mem->bo);
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/amdkfd: don't add DOORBELL and MMIO BOs to validate list
  2022-05-25 22:17       ` Felix Kuehling
@ 2022-05-30  9:57         ` Christian König
  0 siblings, 0 replies; 9+ messages in thread
From: Christian König @ 2022-05-30  9:57 UTC (permalink / raw)
  To: Felix Kuehling, Lang Yu; +Cc: Alex Deucher, Huang Rui, amd-gfx

Am 26.05.22 um 00:17 schrieb Felix Kuehling:
>
> On 2022-05-25 06:37, Christian König wrote:
>> Am 25.05.22 um 11:25 schrieb Lang Yu:
>>> On 05/25/ , Christian König wrote:
>>>> Am 25.05.22 um 10:43 schrieb Lang Yu:
>>>>> DOORBELL and MMIO BOs never move once created.
>>>>> No need to validate them after that.
>>>> Yeah, but you still need to make sure their page tables are up to 
>>>> date.
>>>>
>>>> So this here might break horrible.
>>> These BOs(and attachments) are validated when allocated and mapped.
>>> Their page tables should be determined at this time.
>>>
>>> The kfd_bo_list is used to restore BOs after evictions.
>>>
>>> Do you mean their page tabes could be changed? Thanks.
>>
>> Yes, page tables can be destroyed under memory pressure as well.
>
> Is that actually happening today, or is that some future optimization 
> you have in mind? I know page tables can get evicted, but I didn't 
> think they were destroyed unless the memory at that address is 
> unmapped (which never happens for pinned BOs).

We partially already do that, but it is largely for future optimizations.

Currently page tables are moved to system memory and occupy system 
pages. But that's just utterly nonsense because they are re-created 
anyway when they move back in.

>>
>> Not sure how the KFD handles that, but in theory we should have every 
>> BO used by a process on the validation list. Even the ones pinned.
>
> Then we already have some other broken cases for the small number of 
> kmapped BOs that are pinned and currently removed from the validation 
> list (see amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel).

That's indeed broken then and only works by coincidence (the pinned and 
unpinned probably BOs don't overlap an evicted page table).

Regards,
Christian.

>
> Regards,
>   Felix
>
>
>>
>> Regards,
>> Christian.
>>
>>>
>>>
>>>> Christian.
>>>>
>>>>> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>>>>> ---
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 
>>>>> +++++++++-----
>>>>>    1 file changed, 9 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> index 34ba9e776521..45de9cadd771 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> @@ -808,6 +808,10 @@ static void add_kgd_mem_to_kfd_bo_list(struct 
>>>>> kgd_mem *mem,
>>>>>        struct ttm_validate_buffer *entry = &mem->validate_list;
>>>>>        struct amdgpu_bo *bo = mem->bo;
>>>>> +    if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
>>>>> +                KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
>>>>> +        return;
>>>>> +
>>>>>        INIT_LIST_HEAD(&entry->head);
>>>>>        entry->num_shared = 1;
>>>>>        entry->bo = &bo->tbo;
>>>>> @@ -824,6 +828,10 @@ static void 
>>>>> remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
>>>>>    {
>>>>>        struct ttm_validate_buffer *bo_list_entry;
>>>>> +    if (mem->alloc_flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
>>>>> +                KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))
>>>>> +        return;
>>>>> +
>>>>>        bo_list_entry = &mem->validate_list;
>>>>>        mutex_lock(&process_info->lock);
>>>>>        list_del(&bo_list_entry->head);
>>>>> @@ -1649,7 +1657,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>>>        unsigned long bo_size = mem->bo->tbo.base.size;
>>>>>        struct kfd_mem_attachment *entry, *tmp;
>>>>>        struct bo_vm_reservation_context ctx;
>>>>> -    struct ttm_validate_buffer *bo_list_entry;
>>>>>        unsigned int mapped_to_gpu_memory;
>>>>>        int ret;
>>>>>        bool is_imported = false;
>>>>> @@ -1677,10 +1684,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>>>>        }
>>>>>        /* Make sure restore workers don't access the BO any more */
>>>>> -    bo_list_entry = &mem->validate_list;
>>>>> -    mutex_lock(&process_info->lock);
>>>>> -    list_del(&bo_list_entry->head);
>>>>> -    mutex_unlock(&process_info->lock);
>>>>> +    remove_kgd_mem_from_kfd_bo_list(mem, process_info);
>>>>>        /* No more MMU notifiers */
>>>>>        amdgpu_mn_unregister(mem->bo);
>>


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2022-05-30  9:57 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-25  8:43 [PATCH] drm/amdkfd: don't add DOORBELL and MMIO BOs to validate list Lang Yu
2022-05-25  8:45 ` Christian König
2022-05-25  9:25   ` Lang Yu
2022-05-25 10:37     ` Christian König
2022-05-25 11:37       ` Lang Yu
2022-05-25 11:43         ` Christian König
2022-05-25 12:46           ` Lang Yu
2022-05-25 22:17       ` Felix Kuehling
2022-05-30  9:57         ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.