All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] drm/amdkfd: reserve the BO before validating it
@ 2024-01-22  9:08 Lang Yu
  2024-01-24 21:40 ` Felix Kuehling
  0 siblings, 1 reply; 7+ messages in thread
From: Lang Yu @ 2024-01-22  9:08 UTC (permalink / raw)
  To: amd-gfx; +Cc: Felix Kuehling, Lang Yu, Francis David

Fixes: 410f08516e0f ("drm/amdkfd: Move dma unmapping after TLB flush")

v2:
Avoid unmapping attachment twice when ERESTARTSYS.

[   41.708711] WARNING: CPU: 0 PID: 1463 at drivers/gpu/drm/ttm/ttm_bo.c:846 ttm_bo_validate+0x146/0x1b0 [ttm]
[   41.708989] Call Trace:
[   41.708992]  <TASK>
[   41.708996]  ? show_regs+0x6c/0x80
[   41.709000]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
[   41.709008]  ? __warn+0x93/0x190
[   41.709014]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
[   41.709024]  ? report_bug+0x1f9/0x210
[   41.709035]  ? handle_bug+0x46/0x80
[   41.709041]  ? exc_invalid_op+0x1d/0x80
[   41.709048]  ? asm_exc_invalid_op+0x1f/0x30
[   41.709057]  ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80 [amdgpu]
[   41.709185]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
[   41.709197]  ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80 [amdgpu]
[   41.709337]  ? srso_alias_return_thunk+0x5/0x7f
[   41.709346]  kfd_mem_dmaunmap_attachment+0x9e/0x1e0 [amdgpu]
[   41.709467]  amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x56/0x80 [amdgpu]
[   41.709586]  kfd_ioctl_unmap_memory_from_gpu+0x1b7/0x300 [amdgpu]
[   41.709710]  kfd_ioctl+0x1ec/0x650 [amdgpu]
[   41.709822]  ? __pfx_kfd_ioctl_unmap_memory_from_gpu+0x10/0x10 [amdgpu]
[   41.709945]  ? srso_alias_return_thunk+0x5/0x7f
[   41.709949]  ? tomoyo_file_ioctl+0x20/0x30
[   41.709959]  __x64_sys_ioctl+0x9c/0xd0
[   41.709967]  do_syscall_64+0x3f/0x90
[   41.709973]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8

Signed-off-by: Lang Yu <Lang.Yu@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  2 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28 +++++++++++++++++--
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  4 ++-
 3 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 584a0cea5572..41854417e487 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -311,7 +311,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
 					  struct kgd_mem *mem, void *drm_priv);
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
 int amdgpu_amdkfd_gpuvm_sync_memory(
 		struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
 int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 6f3a4cb2a9ef..7a050d46fa4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2088,21 +2088,43 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	return ret;
 }
 
-void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
+int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
 {
 	struct kfd_mem_attachment *entry;
 	struct amdgpu_vm *vm;
+	bool reserved = false;
+	int ret = 0;
 
 	vm = drm_priv_to_vm(drm_priv);
 
 	mutex_lock(&mem->lock);
 
 	list_for_each_entry(entry, &mem->attachments, list) {
-		if (entry->bo_va->base.vm == vm)
-			kfd_mem_dmaunmap_attachment(mem, entry);
+		if (entry->bo_va->base.vm != vm)
+			continue;
+		if (entry->type == KFD_MEM_ATT_SHARED ||
+		    entry->type == KFD_MEM_ATT_DMABUF)
+			continue;
+		if (!entry->bo_va->base.bo->tbo.ttm->sg)
+			continue;
+
+		if (!reserved) {
+			ret = amdgpu_bo_reserve(mem->bo, true);
+			if (ret)
+				goto out;
+			reserved = true;
+		}
+
+		kfd_mem_dmaunmap_attachment(mem, entry);
 	}
 
+	if (reserved)
+		amdgpu_bo_unreserve(mem->bo);
+
+out:
 	mutex_unlock(&mem->lock);
+
+	return ret;
 }
 
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index ce4c52ec34d8..80e90fdef291 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1442,7 +1442,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
 			kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
 
 		/* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */
-		amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
+		err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
+		if (err)
+			goto sync_memory_failed;
 	}
 
 	mutex_unlock(&p->mutex);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] drm/amdkfd: reserve the BO before validating it
  2024-01-22  9:08 [PATCH v2] drm/amdkfd: reserve the BO before validating it Lang Yu
@ 2024-01-24 21:40 ` Felix Kuehling
  2024-01-26  1:59   ` Yu, Lang
  0 siblings, 1 reply; 7+ messages in thread
From: Felix Kuehling @ 2024-01-24 21:40 UTC (permalink / raw)
  To: Lang Yu, amd-gfx; +Cc: Francis David

On 2024-01-22 4:08, Lang Yu wrote:
> Fixes: 410f08516e0f ("drm/amdkfd: Move dma unmapping after TLB flush")
>
> v2:
> Avoid unmapping attachment twice when ERESTARTSYS.
>
> [   41.708711] WARNING: CPU: 0 PID: 1463 at drivers/gpu/drm/ttm/ttm_bo.c:846 ttm_bo_validate+0x146/0x1b0 [ttm]
> [   41.708989] Call Trace:
> [   41.708992]  <TASK>
> [   41.708996]  ? show_regs+0x6c/0x80
> [   41.709000]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
> [   41.709008]  ? __warn+0x93/0x190
> [   41.709014]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
> [   41.709024]  ? report_bug+0x1f9/0x210
> [   41.709035]  ? handle_bug+0x46/0x80
> [   41.709041]  ? exc_invalid_op+0x1d/0x80
> [   41.709048]  ? asm_exc_invalid_op+0x1f/0x30
> [   41.709057]  ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80 [amdgpu]
> [   41.709185]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
> [   41.709197]  ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80 [amdgpu]
> [   41.709337]  ? srso_alias_return_thunk+0x5/0x7f
> [   41.709346]  kfd_mem_dmaunmap_attachment+0x9e/0x1e0 [amdgpu]
> [   41.709467]  amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x56/0x80 [amdgpu]
> [   41.709586]  kfd_ioctl_unmap_memory_from_gpu+0x1b7/0x300 [amdgpu]
> [   41.709710]  kfd_ioctl+0x1ec/0x650 [amdgpu]
> [   41.709822]  ? __pfx_kfd_ioctl_unmap_memory_from_gpu+0x10/0x10 [amdgpu]
> [   41.709945]  ? srso_alias_return_thunk+0x5/0x7f
> [   41.709949]  ? tomoyo_file_ioctl+0x20/0x30
> [   41.709959]  __x64_sys_ioctl+0x9c/0xd0
> [   41.709967]  do_syscall_64+0x3f/0x90
> [   41.709973]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
>
> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  2 +-
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28 +++++++++++++++++--
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  4 ++-
>   3 files changed, 29 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 584a0cea5572..41854417e487 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -311,7 +311,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
>   					  struct kgd_mem *mem, void *drm_priv);
>   int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>   		struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
> -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
> +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
>   int amdgpu_amdkfd_gpuvm_sync_memory(
>   		struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
>   int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 6f3a4cb2a9ef..7a050d46fa4d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -2088,21 +2088,43 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>   	return ret;
>   }
>   
> -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
> +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
>   {
>   	struct kfd_mem_attachment *entry;
>   	struct amdgpu_vm *vm;
> +	bool reserved = false;
> +	int ret = 0;
>   
>   	vm = drm_priv_to_vm(drm_priv);
>   
>   	mutex_lock(&mem->lock);
>   
>   	list_for_each_entry(entry, &mem->attachments, list) {
> -		if (entry->bo_va->base.vm == vm)
> -			kfd_mem_dmaunmap_attachment(mem, entry);
> +		if (entry->bo_va->base.vm != vm)
> +			continue;
> +		if (entry->type == KFD_MEM_ATT_SHARED ||
> +		    entry->type == KFD_MEM_ATT_DMABUF)
> +			continue;
> +		if (!entry->bo_va->base.bo->tbo.ttm->sg)
> +			continue;

You're going to great lengths to avoid the reservation when it's not 
needed by kfd_mem_dmaunmap_attachment. However, this feels a bit 
fragile. Any changes in the kfd_mem_dmaunmap_* functions could break this.


> +
> +		if (!reserved) {
> +			ret = amdgpu_bo_reserve(mem->bo, true);
> +			if (ret)
> +				goto out;
> +			reserved = true;
> +		}
> +
> +		kfd_mem_dmaunmap_attachment(mem, entry);
>   	}
>   
> +	if (reserved)
> +		amdgpu_bo_unreserve(mem->bo);
> +
> +out:
>   	mutex_unlock(&mem->lock);
> +
> +	return ret;
>   }
>   
>   int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index ce4c52ec34d8..80e90fdef291 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1442,7 +1442,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>   			kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
>   
>   		/* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT */
> -		amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
> +		err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
> +		if (err)
> +			goto sync_memory_failed;

This handles the case that the system call got interrupted. But you're 
not handling the restart correctly. When the ioctl is restarted, you 
don't know how many dmaunmaps are already done. So you'd need to make 
sure that repeating the dmaunmap is safe in all cases. Or what David 
tried earlier, find a way to track the unmapping so you only try to 
dmaunmap on GPUs where it's actually dmamapped.

Regards,
   Felix


>   	}
>   
>   	mutex_unlock(&p->mutex);

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH v2] drm/amdkfd: reserve the BO before validating it
  2024-01-24 21:40 ` Felix Kuehling
@ 2024-01-26  1:59   ` Yu, Lang
  2024-01-26 19:21     ` Felix Kuehling
  0 siblings, 1 reply; 7+ messages in thread
From: Yu, Lang @ 2024-01-26  1:59 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx; +Cc: Francis, David

[AMD Official Use Only - General]

>-----Original Message-----
>From: Kuehling, Felix <Felix.Kuehling@amd.com>
>Sent: Thursday, January 25, 2024 5:41 AM
>To: Yu, Lang <Lang.Yu@amd.com>; amd-gfx@lists.freedesktop.org
>Cc: Francis, David <David.Francis@amd.com>
>Subject: Re: [PATCH v2] drm/amdkfd: reserve the BO before validating it
>
>On 2024-01-22 4:08, Lang Yu wrote:
>> Fixes: 410f08516e0f ("drm/amdkfd: Move dma unmapping after TLB flush")
>>
>> v2:
>> Avoid unmapping attachment twice when ERESTARTSYS.
>>
>> [   41.708711] WARNING: CPU: 0 PID: 1463 at
>drivers/gpu/drm/ttm/ttm_bo.c:846 ttm_bo_validate+0x146/0x1b0 [ttm]
>> [   41.708989] Call Trace:
>> [   41.708992]  <TASK>
>> [   41.708996]  ? show_regs+0x6c/0x80
>> [   41.709000]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
>> [   41.709008]  ? __warn+0x93/0x190
>> [   41.709014]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
>> [   41.709024]  ? report_bug+0x1f9/0x210
>> [   41.709035]  ? handle_bug+0x46/0x80
>> [   41.709041]  ? exc_invalid_op+0x1d/0x80
>> [   41.709048]  ? asm_exc_invalid_op+0x1f/0x30
>> [   41.709057]  ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80
>[amdgpu]
>> [   41.709185]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
>> [   41.709197]  ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80
>[amdgpu]
>> [   41.709337]  ? srso_alias_return_thunk+0x5/0x7f
>> [   41.709346]  kfd_mem_dmaunmap_attachment+0x9e/0x1e0 [amdgpu]
>> [   41.709467]  amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x56/0x80
>[amdgpu]
>> [   41.709586]  kfd_ioctl_unmap_memory_from_gpu+0x1b7/0x300 [amdgpu]
>> [   41.709710]  kfd_ioctl+0x1ec/0x650 [amdgpu]
>> [   41.709822]  ? __pfx_kfd_ioctl_unmap_memory_from_gpu+0x10/0x10
>[amdgpu]
>> [   41.709945]  ? srso_alias_return_thunk+0x5/0x7f
>> [   41.709949]  ? tomoyo_file_ioctl+0x20/0x30
>> [   41.709959]  __x64_sys_ioctl+0x9c/0xd0
>> [   41.709967]  do_syscall_64+0x3f/0x90
>> [   41.709973]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
>>
>> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  2 +-
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28
>+++++++++++++++++--
>>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  4 ++-
>>   3 files changed, 29 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index 584a0cea5572..41854417e487 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -311,7 +311,7 @@ int
>amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
>>                                        struct kgd_mem *mem, void
>*drm_priv);
>>   int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>>              struct amdgpu_device *adev, struct kgd_mem *mem, void
>*drm_priv);
>> -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>> *drm_priv);
>> +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>> +*drm_priv);
>>   int amdgpu_amdkfd_gpuvm_sync_memory(
>>              struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
>>   int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index 6f3a4cb2a9ef..7a050d46fa4d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -2088,21 +2088,43 @@ int
>amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>>      return ret;
>>   }
>>
>> -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>> *drm_priv)
>> +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>> +*drm_priv)
>>   {
>>      struct kfd_mem_attachment *entry;
>>      struct amdgpu_vm *vm;
>> +    bool reserved = false;
>> +    int ret = 0;
>>
>>      vm = drm_priv_to_vm(drm_priv);
>>
>>      mutex_lock(&mem->lock);
>>
>>      list_for_each_entry(entry, &mem->attachments, list) {
>> -            if (entry->bo_va->base.vm == vm)
>> -                    kfd_mem_dmaunmap_attachment(mem, entry);
>> +            if (entry->bo_va->base.vm != vm)
>> +                    continue;
>> +            if (entry->type == KFD_MEM_ATT_SHARED ||
>> +                entry->type == KFD_MEM_ATT_DMABUF)
>> +                    continue;
>> +            if (!entry->bo_va->base.bo->tbo.ttm->sg)
>> +                    continue;
>
>You're going to great lengths to avoid the reservation when it's not needed by
>kfd_mem_dmaunmap_attachment. However, this feels a bit fragile. Any changes
>in the kfd_mem_dmaunmap_* functions could break this.
>
>> +
>> +            if (!reserved) {
>> +                    ret = amdgpu_bo_reserve(mem->bo, true);
>> +                    if (ret)
>> +                            goto out;
>> +                    reserved = true;
>> +            }
>> +
>> +            kfd_mem_dmaunmap_attachment(mem, entry);
>>      }
>>
>> +    if (reserved)
>> +            amdgpu_bo_unreserve(mem->bo);
>> +
>> +out:
>>      mutex_unlock(&mem->lock);
>> +
>> +    return ret;
>>   }
>>
>>   int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index ce4c52ec34d8..80e90fdef291 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -1442,7 +1442,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct
>file *filep,
>>                      kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
>>
>>              /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT
>*/
>> -            amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd-
>>drm_priv);
>> +            err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem,
>peer_pdd->drm_priv);
>> +            if (err)
>> +                    goto sync_memory_failed;
>
>This handles the case that the system call got interrupted. But you're not handling
>the restart correctly. When the ioctl is restarted, you don't know how many
>dmaunmaps are already done. So you'd need to make sure that repeating the
>dmaunmap is safe in all cases. Or what David tried earlier, find a way to track the
>unmapping so you only try to dmaunmap on GPUs where it's actually dmamapped.

From previous discussion, no one likes add another variable to track the unmappings. So I'm avoiding adding another variable.

Actually, all memory attachments use sg_table, ttm->sg is NULL? can be used as an indicator to see whether an attachment is already unmapped.
That already unmapped will not be repeated.

Regards,
Lang

>Regards,
>   Felix
>
>
>>      }
>>
>>      mutex_unlock(&p->mutex);

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] drm/amdkfd: reserve the BO before validating it
  2024-01-26  1:59   ` Yu, Lang
@ 2024-01-26 19:21     ` Felix Kuehling
  2024-01-29  2:30       ` Yu, Lang
  0 siblings, 1 reply; 7+ messages in thread
From: Felix Kuehling @ 2024-01-26 19:21 UTC (permalink / raw)
  To: Yu, Lang, amd-gfx; +Cc: Francis, David


On 2024-01-25 20:59, Yu, Lang wrote:
> [AMD Official Use Only - General]
>
>> -----Original Message-----
>> From: Kuehling, Felix <Felix.Kuehling@amd.com>
>> Sent: Thursday, January 25, 2024 5:41 AM
>> To: Yu, Lang <Lang.Yu@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Francis, David <David.Francis@amd.com>
>> Subject: Re: [PATCH v2] drm/amdkfd: reserve the BO before validating it
>>
>> On 2024-01-22 4:08, Lang Yu wrote:
>>> Fixes: 410f08516e0f ("drm/amdkfd: Move dma unmapping after TLB flush")
>>>
>>> v2:
>>> Avoid unmapping attachment twice when ERESTARTSYS.
>>>
>>> [   41.708711] WARNING: CPU: 0 PID: 1463 at
>> drivers/gpu/drm/ttm/ttm_bo.c:846 ttm_bo_validate+0x146/0x1b0 [ttm]
>>> [   41.708989] Call Trace:
>>> [   41.708992]  <TASK>
>>> [   41.708996]  ? show_regs+0x6c/0x80
>>> [   41.709000]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
>>> [   41.709008]  ? __warn+0x93/0x190
>>> [   41.709014]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
>>> [   41.709024]  ? report_bug+0x1f9/0x210
>>> [   41.709035]  ? handle_bug+0x46/0x80
>>> [   41.709041]  ? exc_invalid_op+0x1d/0x80
>>> [   41.709048]  ? asm_exc_invalid_op+0x1f/0x30
>>> [   41.709057]  ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80
>> [amdgpu]
>>> [   41.709185]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
>>> [   41.709197]  ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80
>> [amdgpu]
>>> [   41.709337]  ? srso_alias_return_thunk+0x5/0x7f
>>> [   41.709346]  kfd_mem_dmaunmap_attachment+0x9e/0x1e0 [amdgpu]
>>> [   41.709467]  amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x56/0x80
>> [amdgpu]
>>> [   41.709586]  kfd_ioctl_unmap_memory_from_gpu+0x1b7/0x300 [amdgpu]
>>> [   41.709710]  kfd_ioctl+0x1ec/0x650 [amdgpu]
>>> [   41.709822]  ? __pfx_kfd_ioctl_unmap_memory_from_gpu+0x10/0x10
>> [amdgpu]
>>> [   41.709945]  ? srso_alias_return_thunk+0x5/0x7f
>>> [   41.709949]  ? tomoyo_file_ioctl+0x20/0x30
>>> [   41.709959]  __x64_sys_ioctl+0x9c/0xd0
>>> [   41.709967]  do_syscall_64+0x3f/0x90
>>> [   41.709973]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
>>>
>>> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  2 +-
>>>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28
>> +++++++++++++++++--
>>>    drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  4 ++-
>>>    3 files changed, 29 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> index 584a0cea5572..41854417e487 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> @@ -311,7 +311,7 @@ int
>> amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
>>>                                         struct kgd_mem *mem, void
>> *drm_priv);
>>>    int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>>>               struct amdgpu_device *adev, struct kgd_mem *mem, void
>> *drm_priv);
>>> -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>>> *drm_priv);
>>> +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>>> +*drm_priv);
>>>    int amdgpu_amdkfd_gpuvm_sync_memory(
>>>               struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
>>>    int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index 6f3a4cb2a9ef..7a050d46fa4d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> @@ -2088,21 +2088,43 @@ int
>> amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>>>       return ret;
>>>    }
>>>
>>> -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>>> *drm_priv)
>>> +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>>> +*drm_priv)
>>>    {
>>>       struct kfd_mem_attachment *entry;
>>>       struct amdgpu_vm *vm;
>>> +    bool reserved = false;
>>> +    int ret = 0;
>>>
>>>       vm = drm_priv_to_vm(drm_priv);
>>>
>>>       mutex_lock(&mem->lock);
>>>
>>>       list_for_each_entry(entry, &mem->attachments, list) {
>>> -            if (entry->bo_va->base.vm == vm)
>>> -                    kfd_mem_dmaunmap_attachment(mem, entry);
>>> +            if (entry->bo_va->base.vm != vm)
>>> +                    continue;
>>> +            if (entry->type == KFD_MEM_ATT_SHARED ||
>>> +                entry->type == KFD_MEM_ATT_DMABUF)
>>> +                    continue;
>>> +            if (!entry->bo_va->base.bo->tbo.ttm->sg)
>>> +                    continue;
>> You're going to great lengths to avoid the reservation when it's not needed by
>> kfd_mem_dmaunmap_attachment. However, this feels a bit fragile. Any changes
>> in the kfd_mem_dmaunmap_* functions could break this.
>>
>>> +
>>> +            if (!reserved) {
>>> +                    ret = amdgpu_bo_reserve(mem->bo, true);
>>> +                    if (ret)
>>> +                            goto out;
>>> +                    reserved = true;
>>> +            }
>>> +
>>> +            kfd_mem_dmaunmap_attachment(mem, entry);
>>>       }
>>>
>>> +    if (reserved)
>>> +            amdgpu_bo_unreserve(mem->bo);
>>> +
>>> +out:
>>>       mutex_unlock(&mem->lock);
>>> +
>>> +    return ret;
>>>    }
>>>
>>>    int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> index ce4c52ec34d8..80e90fdef291 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> @@ -1442,7 +1442,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct
>> file *filep,
>>>                       kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
>>>
>>>               /* Remove dma mapping after tlb flush to avoid IO_PAGE_FAULT
>> */
>>> -            amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd-
>>> drm_priv);
>>> +            err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem,
>> peer_pdd->drm_priv);
>>> +            if (err)
>>> +                    goto sync_memory_failed;
>> This handles the case that the system call got interrupted. But you're not handling
>> the restart correctly. When the ioctl is restarted, you don't know how many
>> dmaunmaps are already done. So you'd need to make sure that repeating the
>> dmaunmap is safe in all cases. Or what David tried earlier, find a way to track the
>> unmapping so you only try to dmaunmap on GPUs where it's actually dmamapped.
>  From previous discussion, no one likes add another variable to track the unmappings. So I'm avoiding adding another variable.
>
> Actually, all memory attachments use sg_table, ttm->sg is NULL? can be used as an indicator to see whether an attachment is already unmapped.
> That already unmapped will not be repeated.

I think that should work. I'd add the checks in kfd_mem_dmaunmap_userptr 
and kfd_mem_dmaunmap_sg_bo, where we also set ttm->sg to NULL. In fact, 
both those functions already have that check. So looks like it should 
handle the the restart correctly with your patch.

Regards,
   Felix


>
> Regards,
> Lang
>
>> Regards,
>>    Felix
>>
>>
>>>       }
>>>
>>>       mutex_unlock(&p->mutex);

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH v2] drm/amdkfd: reserve the BO before validating it
  2024-01-26 19:21     ` Felix Kuehling
@ 2024-01-29  2:30       ` Yu, Lang
  2024-01-29 14:57         ` Felix Kuehling
  0 siblings, 1 reply; 7+ messages in thread
From: Yu, Lang @ 2024-01-29  2:30 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx; +Cc: Francis, David

[AMD Official Use Only - General]

>-----Original Message-----
>From: Kuehling, Felix <Felix.Kuehling@amd.com>
>Sent: Saturday, January 27, 2024 3:22 AM
>To: Yu, Lang <Lang.Yu@amd.com>; amd-gfx@lists.freedesktop.org
>Cc: Francis, David <David.Francis@amd.com>
>Subject: Re: [PATCH v2] drm/amdkfd: reserve the BO before validating it
>
>
>On 2024-01-25 20:59, Yu, Lang wrote:
>> [AMD Official Use Only - General]
>>
>>> -----Original Message-----
>>> From: Kuehling, Felix <Felix.Kuehling@amd.com>
>>> Sent: Thursday, January 25, 2024 5:41 AM
>>> To: Yu, Lang <Lang.Yu@amd.com>; amd-gfx@lists.freedesktop.org
>>> Cc: Francis, David <David.Francis@amd.com>
>>> Subject: Re: [PATCH v2] drm/amdkfd: reserve the BO before validating
>>> it
>>>
>>> On 2024-01-22 4:08, Lang Yu wrote:
>>>> Fixes: 410f08516e0f ("drm/amdkfd: Move dma unmapping after TLB
>>>> flush")
>>>>
>>>> v2:
>>>> Avoid unmapping attachment twice when ERESTARTSYS.
>>>>
>>>> [   41.708711] WARNING: CPU: 0 PID: 1463 at
>>> drivers/gpu/drm/ttm/ttm_bo.c:846 ttm_bo_validate+0x146/0x1b0 [ttm]
>>>> [   41.708989] Call Trace:
>>>> [   41.708992]  <TASK>
>>>> [   41.708996]  ? show_regs+0x6c/0x80
>>>> [   41.709000]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
>>>> [   41.709008]  ? __warn+0x93/0x190
>>>> [   41.709014]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
>>>> [   41.709024]  ? report_bug+0x1f9/0x210
>>>> [   41.709035]  ? handle_bug+0x46/0x80
>>>> [   41.709041]  ? exc_invalid_op+0x1d/0x80
>>>> [   41.709048]  ? asm_exc_invalid_op+0x1f/0x30
>>>> [   41.709057]  ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80
>>> [amdgpu]
>>>> [   41.709185]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
>>>> [   41.709197]  ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80
>>> [amdgpu]
>>>> [   41.709337]  ? srso_alias_return_thunk+0x5/0x7f
>>>> [   41.709346]  kfd_mem_dmaunmap_attachment+0x9e/0x1e0 [amdgpu]
>>>> [   41.709467]  amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x56/0x80
>>> [amdgpu]
>>>> [   41.709586]  kfd_ioctl_unmap_memory_from_gpu+0x1b7/0x300 [amdgpu]
>>>> [   41.709710]  kfd_ioctl+0x1ec/0x650 [amdgpu]
>>>> [   41.709822]  ? __pfx_kfd_ioctl_unmap_memory_from_gpu+0x10/0x10
>>> [amdgpu]
>>>> [   41.709945]  ? srso_alias_return_thunk+0x5/0x7f
>>>> [   41.709949]  ? tomoyo_file_ioctl+0x20/0x30
>>>> [   41.709959]  __x64_sys_ioctl+0x9c/0xd0
>>>> [   41.709967]  do_syscall_64+0x3f/0x90
>>>> [   41.709973]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
>>>>
>>>> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  2 +-
>>>>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28
>>> +++++++++++++++++--
>>>>    drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  4 ++-
>>>>    3 files changed, 29 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>> index 584a0cea5572..41854417e487 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>> @@ -311,7 +311,7 @@ int
>>> amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
>>>>                                         struct kgd_mem *mem, void
>>> *drm_priv);
>>>>    int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>>>>               struct amdgpu_device *adev, struct kgd_mem *mem, void
>>> *drm_priv);
>>>> -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>>>> *drm_priv);
>>>> +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>>>> +*drm_priv);
>>>>    int amdgpu_amdkfd_gpuvm_sync_memory(
>>>>               struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
>>>>    int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>> index 6f3a4cb2a9ef..7a050d46fa4d 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>> @@ -2088,21 +2088,43 @@ int
>>> amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>>>>       return ret;
>>>>    }
>>>>
>>>> -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>>>> *drm_priv)
>>>> +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>>>> +*drm_priv)
>>>>    {
>>>>       struct kfd_mem_attachment *entry;
>>>>       struct amdgpu_vm *vm;
>>>> +    bool reserved = false;
>>>> +    int ret = 0;
>>>>
>>>>       vm = drm_priv_to_vm(drm_priv);
>>>>
>>>>       mutex_lock(&mem->lock);
>>>>
>>>>       list_for_each_entry(entry, &mem->attachments, list) {
>>>> -            if (entry->bo_va->base.vm == vm)
>>>> -                    kfd_mem_dmaunmap_attachment(mem, entry);
>>>> +            if (entry->bo_va->base.vm != vm)
>>>> +                    continue;
>>>> +            if (entry->type == KFD_MEM_ATT_SHARED ||
>>>> +                entry->type == KFD_MEM_ATT_DMABUF)
>>>> +                    continue;
>>>> +            if (!entry->bo_va->base.bo->tbo.ttm->sg)
>>>> +                    continue;
>>> You're going to great lengths to avoid the reservation when it's not
>>> needed by kfd_mem_dmaunmap_attachment. However, this feels a bit
>>> fragile. Any changes in the kfd_mem_dmaunmap_* functions could break this.
>>>
>>>> +
>>>> +            if (!reserved) {
>>>> +                    ret = amdgpu_bo_reserve(mem->bo, true);
>>>> +                    if (ret)
>>>> +                            goto out;
>>>> +                    reserved = true;
>>>> +            }
>>>> +
>>>> +            kfd_mem_dmaunmap_attachment(mem, entry);
>>>>       }
>>>>
>>>> +    if (reserved)
>>>> +            amdgpu_bo_unreserve(mem->bo);
>>>> +
>>>> +out:
>>>>       mutex_unlock(&mem->lock);
>>>> +
>>>> +    return ret;
>>>>    }
>>>>
>>>>    int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>>> index ce4c52ec34d8..80e90fdef291 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>>> @@ -1442,7 +1442,9 @@ static int
>>>> kfd_ioctl_unmap_memory_from_gpu(struct
>>> file *filep,
>>>>                       kfd_flush_tlb(peer_pdd,
>>>> TLB_FLUSH_HEAVYWEIGHT);
>>>>
>>>>               /* Remove dma mapping after tlb flush to avoid
>>>> IO_PAGE_FAULT
>>> */
>>>> -            amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd-
>>>> drm_priv);
>>>> +            err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem,
>>> peer_pdd->drm_priv);
>>>> +            if (err)
>>>> +                    goto sync_memory_failed;
>>> This handles the case that the system call got interrupted. But
>>> you're not handling the restart correctly. When the ioctl is
>>> restarted, you don't know how many dmaunmaps are already done. So
>>> you'd need to make sure that repeating the dmaunmap is safe in all
>>> cases. Or what David tried earlier, find a way to track the unmapping so you
>only try to dmaunmap on GPUs where it's actually dmamapped.
>>  From previous discussion, no one likes add another variable to track the
>unmappings. So I'm avoiding adding another variable.
>>
>> Actually, all memory attachments use sg_table, ttm->sg is NULL? can be used as
>an indicator to see whether an attachment is already unmapped.
>> That already unmapped will not be repeated.
>
>I think that should work. I'd add the checks in kfd_mem_dmaunmap_userptr and
>kfd_mem_dmaunmap_sg_bo, where we also set ttm->sg to NULL. In fact, both
>those functions already have that check. So looks like it should handle the the
>restart correctly with your patch.

Yes, both kfd_mem_dmaunmap_userptr() and kfd_mem_dmaunmap_sg_bo() have NULL check for ttm->sg.

And dmabuf also have this check in amdgpu_ttm_backend_unbind(). So dmaunmap won't be repeated actually.

Then the benefits of handling ERESTARTSYS is avoiding amdgpu_bo_reserve().

What do you think? It's worth avoiding reservation in this case?

Regards,
Lang

>Regards,
>   Felix
>
>
>>
>> Regards,
>> Lang
>>
>>> Regards,
>>>    Felix
>>>
>>>
>>>>       }
>>>>
>>>>       mutex_unlock(&p->mutex);

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] drm/amdkfd: reserve the BO before validating it
  2024-01-29  2:30       ` Yu, Lang
@ 2024-01-29 14:57         ` Felix Kuehling
  2024-01-30  2:23           ` Yu, Lang
  0 siblings, 1 reply; 7+ messages in thread
From: Felix Kuehling @ 2024-01-29 14:57 UTC (permalink / raw)
  To: Yu, Lang, amd-gfx; +Cc: Francis, David

On 2024-01-28 21:30, Yu, Lang wrote:
> [AMD Official Use Only - General]
>
>> -----Original Message-----
>> From: Kuehling, Felix <Felix.Kuehling@amd.com>
>> Sent: Saturday, January 27, 2024 3:22 AM
>> To: Yu, Lang <Lang.Yu@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Francis, David <David.Francis@amd.com>
>> Subject: Re: [PATCH v2] drm/amdkfd: reserve the BO before validating it
>>
>>
>> On 2024-01-25 20:59, Yu, Lang wrote:
>>> [AMD Official Use Only - General]
>>>
>>>> -----Original Message-----
>>>> From: Kuehling, Felix <Felix.Kuehling@amd.com>
>>>> Sent: Thursday, January 25, 2024 5:41 AM
>>>> To: Yu, Lang <Lang.Yu@amd.com>; amd-gfx@lists.freedesktop.org
>>>> Cc: Francis, David <David.Francis@amd.com>
>>>> Subject: Re: [PATCH v2] drm/amdkfd: reserve the BO before validating
>>>> it
>>>>
>>>> On 2024-01-22 4:08, Lang Yu wrote:
>>>>> Fixes: 410f08516e0f ("drm/amdkfd: Move dma unmapping after TLB
>>>>> flush")
>>>>>
>>>>> v2:
>>>>> Avoid unmapping attachment twice when ERESTARTSYS.
>>>>>
>>>>> [   41.708711] WARNING: CPU: 0 PID: 1463 at
>>>> drivers/gpu/drm/ttm/ttm_bo.c:846 ttm_bo_validate+0x146/0x1b0 [ttm]
>>>>> [   41.708989] Call Trace:
>>>>> [   41.708992]  <TASK>
>>>>> [   41.708996]  ? show_regs+0x6c/0x80
>>>>> [   41.709000]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
>>>>> [   41.709008]  ? __warn+0x93/0x190
>>>>> [   41.709014]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
>>>>> [   41.709024]  ? report_bug+0x1f9/0x210
>>>>> [   41.709035]  ? handle_bug+0x46/0x80
>>>>> [   41.709041]  ? exc_invalid_op+0x1d/0x80
>>>>> [   41.709048]  ? asm_exc_invalid_op+0x1f/0x30
>>>>> [   41.709057]  ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80
>>>> [amdgpu]
>>>>> [   41.709185]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
>>>>> [   41.709197]  ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80
>>>> [amdgpu]
>>>>> [   41.709337]  ? srso_alias_return_thunk+0x5/0x7f
>>>>> [   41.709346]  kfd_mem_dmaunmap_attachment+0x9e/0x1e0 [amdgpu]
>>>>> [   41.709467]  amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x56/0x80
>>>> [amdgpu]
>>>>> [   41.709586]  kfd_ioctl_unmap_memory_from_gpu+0x1b7/0x300 [amdgpu]
>>>>> [   41.709710]  kfd_ioctl+0x1ec/0x650 [amdgpu]
>>>>> [   41.709822]  ? __pfx_kfd_ioctl_unmap_memory_from_gpu+0x10/0x10
>>>> [amdgpu]
>>>>> [   41.709945]  ? srso_alias_return_thunk+0x5/0x7f
>>>>> [   41.709949]  ? tomoyo_file_ioctl+0x20/0x30
>>>>> [   41.709959]  __x64_sys_ioctl+0x9c/0xd0
>>>>> [   41.709967]  do_syscall_64+0x3f/0x90
>>>>> [   41.709973]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
>>>>>
>>>>> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>>>>> ---
>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  2 +-
>>>>>     .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28
>>>> +++++++++++++++++--
>>>>>     drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  4 ++-
>>>>>     3 files changed, 29 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>> index 584a0cea5572..41854417e487 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>> @@ -311,7 +311,7 @@ int
>>>> amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
>>>>>                                          struct kgd_mem *mem, void
>>>> *drm_priv);
>>>>>     int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>>>>>                struct amdgpu_device *adev, struct kgd_mem *mem, void
>>>> *drm_priv);
>>>>> -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>>>>> *drm_priv);
>>>>> +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>>>>> +*drm_priv);
>>>>>     int amdgpu_amdkfd_gpuvm_sync_memory(
>>>>>                struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
>>>>>     int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> index 6f3a4cb2a9ef..7a050d46fa4d 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>> @@ -2088,21 +2088,43 @@ int
>>>> amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>>>>>        return ret;
>>>>>     }
>>>>>
>>>>> -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>>>>> *drm_priv)
>>>>> +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void
>>>>> +*drm_priv)
>>>>>     {
>>>>>        struct kfd_mem_attachment *entry;
>>>>>        struct amdgpu_vm *vm;
>>>>> +    bool reserved = false;
>>>>> +    int ret = 0;
>>>>>
>>>>>        vm = drm_priv_to_vm(drm_priv);
>>>>>
>>>>>        mutex_lock(&mem->lock);
>>>>>
>>>>>        list_for_each_entry(entry, &mem->attachments, list) {
>>>>> -            if (entry->bo_va->base.vm == vm)
>>>>> -                    kfd_mem_dmaunmap_attachment(mem, entry);
>>>>> +            if (entry->bo_va->base.vm != vm)
>>>>> +                    continue;
>>>>> +            if (entry->type == KFD_MEM_ATT_SHARED ||
>>>>> +                entry->type == KFD_MEM_ATT_DMABUF)
>>>>> +                    continue;
>>>>> +            if (!entry->bo_va->base.bo->tbo.ttm->sg)
>>>>> +                    continue;
>>>> You're going to great lengths to avoid the reservation when it's not
>>>> needed by kfd_mem_dmaunmap_attachment. However, this feels a bit
>>>> fragile. Any changes in the kfd_mem_dmaunmap_* functions could break this.
>>>>
>>>>> +
>>>>> +            if (!reserved) {
>>>>> +                    ret = amdgpu_bo_reserve(mem->bo, true);
>>>>> +                    if (ret)
>>>>> +                            goto out;
>>>>> +                    reserved = true;
>>>>> +            }
>>>>> +
>>>>> +            kfd_mem_dmaunmap_attachment(mem, entry);
>>>>>        }
>>>>>
>>>>> +    if (reserved)
>>>>> +            amdgpu_bo_unreserve(mem->bo);
>>>>> +
>>>>> +out:
>>>>>        mutex_unlock(&mem->lock);
>>>>> +
>>>>> +    return ret;
>>>>>     }
>>>>>
>>>>>     int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>>>> index ce4c52ec34d8..80e90fdef291 100644
>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>>>> @@ -1442,7 +1442,9 @@ static int
>>>>> kfd_ioctl_unmap_memory_from_gpu(struct
>>>> file *filep,
>>>>>                        kfd_flush_tlb(peer_pdd,
>>>>> TLB_FLUSH_HEAVYWEIGHT);
>>>>>
>>>>>                /* Remove dma mapping after tlb flush to avoid
>>>>> IO_PAGE_FAULT
>>>> */
>>>>> -            amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd-
>>>>> drm_priv);
>>>>> +            err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem,
>>>> peer_pdd->drm_priv);
>>>>> +            if (err)
>>>>> +                    goto sync_memory_failed;
>>>> This handles the case that the system call got interrupted. But
>>>> you're not handling the restart correctly. When the ioctl is
>>>> restarted, you don't know how many dmaunmaps are already done. So
>>>> you'd need to make sure that repeating the dmaunmap is safe in all
>>>> cases. Or what David tried earlier, find a way to track the unmapping so you
>> only try to dmaunmap on GPUs where it's actually dmamapped.
>>>   From previous discussion, no one likes add another variable to track the
>> unmappings. So I'm avoiding adding another variable.
>>> Actually, all memory attachments use sg_table, ttm->sg is NULL? can be used as
>> an indicator to see whether an attachment is already unmapped.
>>> That already unmapped will not be repeated.
>> I think that should work. I'd add the checks in kfd_mem_dmaunmap_userptr and
>> kfd_mem_dmaunmap_sg_bo, where we also set ttm->sg to NULL. In fact, both
>> those functions already have that check. So looks like it should handle the the
>> restart correctly with your patch.
> Yes, both kfd_mem_dmaunmap_userptr() and kfd_mem_dmaunmap_sg_bo() have NULL check for ttm->sg.
>
> And dmabuf also have this check in amdgpu_ttm_backend_unbind(). So dmaunmap won't be repeated actually.
>
> Then the benefits of handling ERESTARTSYS is avoiding amdgpu_bo_reserve().
>
> What do you think? It's worth avoiding reservation in this case?

I don't think it's worth the trouble. In fact, to avoid race conditions, 
you probably should take the reservation anyway before looking at ttm->sg.

Regards,
   Felix


>
> Regards,
> Lang
>
>> Regards,
>>    Felix
>>
>>
>>> Regards,
>>> Lang
>>>
>>>> Regards,
>>>>     Felix
>>>>
>>>>
>>>>>        }
>>>>>
>>>>>        mutex_unlock(&p->mutex);

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH v2] drm/amdkfd: reserve the BO before validating it
  2024-01-29 14:57         ` Felix Kuehling
@ 2024-01-30  2:23           ` Yu, Lang
  0 siblings, 0 replies; 7+ messages in thread
From: Yu, Lang @ 2024-01-30  2:23 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx; +Cc: Francis, David

[Public]

>-----Original Message-----
>From: Kuehling, Felix <Felix.Kuehling@amd.com>
>Sent: Monday, January 29, 2024 10:58 PM
>To: Yu, Lang <Lang.Yu@amd.com>; amd-gfx@lists.freedesktop.org
>Cc: Francis, David <David.Francis@amd.com>
>Subject: Re: [PATCH v2] drm/amdkfd: reserve the BO before validating it
>
>On 2024-01-28 21:30, Yu, Lang wrote:
>> [AMD Official Use Only - General]
>>
>>> -----Original Message-----
>>> From: Kuehling, Felix <Felix.Kuehling@amd.com>
>>> Sent: Saturday, January 27, 2024 3:22 AM
>>> To: Yu, Lang <Lang.Yu@amd.com>; amd-gfx@lists.freedesktop.org
>>> Cc: Francis, David <David.Francis@amd.com>
>>> Subject: Re: [PATCH v2] drm/amdkfd: reserve the BO before validating
>>> it
>>>
>>>
>>> On 2024-01-25 20:59, Yu, Lang wrote:
>>>> [AMD Official Use Only - General]
>>>>
>>>>> -----Original Message-----
>>>>> From: Kuehling, Felix <Felix.Kuehling@amd.com>
>>>>> Sent: Thursday, January 25, 2024 5:41 AM
>>>>> To: Yu, Lang <Lang.Yu@amd.com>; amd-gfx@lists.freedesktop.org
>>>>> Cc: Francis, David <David.Francis@amd.com>
>>>>> Subject: Re: [PATCH v2] drm/amdkfd: reserve the BO before
>>>>> validating it
>>>>>
>>>>> On 2024-01-22 4:08, Lang Yu wrote:
>>>>>> Fixes: 410f08516e0f ("drm/amdkfd: Move dma unmapping after TLB
>>>>>> flush")
>>>>>>
>>>>>> v2:
>>>>>> Avoid unmapping attachment twice when ERESTARTSYS.
>>>>>>
>>>>>> [   41.708711] WARNING: CPU: 0 PID: 1463 at
>>>>> drivers/gpu/drm/ttm/ttm_bo.c:846 ttm_bo_validate+0x146/0x1b0 [ttm]
>>>>>> [   41.708989] Call Trace:
>>>>>> [   41.708992]  <TASK>
>>>>>> [   41.708996]  ? show_regs+0x6c/0x80
>>>>>> [   41.709000]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
>>>>>> [   41.709008]  ? __warn+0x93/0x190
>>>>>> [   41.709014]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
>>>>>> [   41.709024]  ? report_bug+0x1f9/0x210
>>>>>> [   41.709035]  ? handle_bug+0x46/0x80
>>>>>> [   41.709041]  ? exc_invalid_op+0x1d/0x80
>>>>>> [   41.709048]  ? asm_exc_invalid_op+0x1f/0x30
>>>>>> [   41.709057]  ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80
>>>>> [amdgpu]
>>>>>> [   41.709185]  ? ttm_bo_validate+0x146/0x1b0 [ttm]
>>>>>> [   41.709197]  ? amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x2c/0x80
>>>>> [amdgpu]
>>>>>> [   41.709337]  ? srso_alias_return_thunk+0x5/0x7f
>>>>>> [   41.709346]  kfd_mem_dmaunmap_attachment+0x9e/0x1e0 [amdgpu]
>>>>>> [   41.709467]  amdgpu_amdkfd_gpuvm_dmaunmap_mem+0x56/0x80
>>>>> [amdgpu]
>>>>>> [   41.709586]  kfd_ioctl_unmap_memory_from_gpu+0x1b7/0x300
>[amdgpu]
>>>>>> [   41.709710]  kfd_ioctl+0x1ec/0x650 [amdgpu]
>>>>>> [   41.709822]  ? __pfx_kfd_ioctl_unmap_memory_from_gpu+0x10/0x10
>>>>> [amdgpu]
>>>>>> [   41.709945]  ? srso_alias_return_thunk+0x5/0x7f
>>>>>> [   41.709949]  ? tomoyo_file_ioctl+0x20/0x30
>>>>>> [   41.709959]  __x64_sys_ioctl+0x9c/0xd0
>>>>>> [   41.709967]  do_syscall_64+0x3f/0x90
>>>>>> [   41.709973]  entry_SYSCALL_64_after_hwframe+0x6e/0xd8
>>>>>>
>>>>>> Signed-off-by: Lang Yu <Lang.Yu@amd.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  2 +-
>>>>>>     .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28
>>>>> +++++++++++++++++--
>>>>>>     drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  4 ++-
>>>>>>     3 files changed, 29 insertions(+), 5 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>>> index 584a0cea5572..41854417e487 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>>>>> @@ -311,7 +311,7 @@ int
>>>>> amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device
>*adev,
>>>>>>                                          struct kgd_mem *mem, void
>>>>> *drm_priv);
>>>>>>     int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>>>>>>                struct amdgpu_device *adev, struct kgd_mem *mem,
>>>>>> void
>>>>> *drm_priv);
>>>>>> -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem,
>void
>>>>>> *drm_priv);
>>>>>> +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem,
>void
>>>>>> +*drm_priv);
>>>>>>     int amdgpu_amdkfd_gpuvm_sync_memory(
>>>>>>                struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
>>>>>>     int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem
>>>>>> *mem, diff --git
>>>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>> index 6f3a4cb2a9ef..7a050d46fa4d 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>>>>> @@ -2088,21 +2088,43 @@ int
>>>>> amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>>>>>>        return ret;
>>>>>>     }
>>>>>>
>>>>>> -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem,
>void
>>>>>> *drm_priv)
>>>>>> +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem,
>void
>>>>>> +*drm_priv)
>>>>>>     {
>>>>>>        struct kfd_mem_attachment *entry;
>>>>>>        struct amdgpu_vm *vm;
>>>>>> +    bool reserved = false;
>>>>>> +    int ret = 0;
>>>>>>
>>>>>>        vm = drm_priv_to_vm(drm_priv);
>>>>>>
>>>>>>        mutex_lock(&mem->lock);
>>>>>>
>>>>>>        list_for_each_entry(entry, &mem->attachments, list) {
>>>>>> -            if (entry->bo_va->base.vm == vm)
>>>>>> -                    kfd_mem_dmaunmap_attachment(mem, entry);
>>>>>> +            if (entry->bo_va->base.vm != vm)
>>>>>> +                    continue;
>>>>>> +            if (entry->type == KFD_MEM_ATT_SHARED ||
>>>>>> +                entry->type == KFD_MEM_ATT_DMABUF)
>>>>>> +                    continue;
>>>>>> +            if (!entry->bo_va->base.bo->tbo.ttm->sg)
>>>>>> +                    continue;
>>>>> You're going to great lengths to avoid the reservation when it's
>>>>> not needed by kfd_mem_dmaunmap_attachment. However, this feels a
>>>>> bit fragile. Any changes in the kfd_mem_dmaunmap_* functions could break
>this.
>>>>>
>>>>>> +
>>>>>> +            if (!reserved) {
>>>>>> +                    ret = amdgpu_bo_reserve(mem->bo, true);
>>>>>> +                    if (ret)
>>>>>> +                            goto out;
>>>>>> +                    reserved = true;
>>>>>> +            }
>>>>>> +
>>>>>> +            kfd_mem_dmaunmap_attachment(mem, entry);
>>>>>>        }
>>>>>>
>>>>>> +    if (reserved)
>>>>>> +            amdgpu_bo_unreserve(mem->bo);
>>>>>> +
>>>>>> +out:
>>>>>>        mutex_unlock(&mem->lock);
>>>>>> +
>>>>>> +    return ret;
>>>>>>     }
>>>>>>
>>>>>>     int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>>>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>>>>> index ce4c52ec34d8..80e90fdef291 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>>>>> @@ -1442,7 +1442,9 @@ static int
>>>>>> kfd_ioctl_unmap_memory_from_gpu(struct
>>>>> file *filep,
>>>>>>                        kfd_flush_tlb(peer_pdd,
>>>>>> TLB_FLUSH_HEAVYWEIGHT);
>>>>>>
>>>>>>                /* Remove dma mapping after tlb flush to avoid
>>>>>> IO_PAGE_FAULT
>>>>> */
>>>>>> -            amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd-
>>>>>> drm_priv);
>>>>>> +            err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem,
>>>>> peer_pdd->drm_priv);
>>>>>> +            if (err)
>>>>>> +                    goto sync_memory_failed;
>>>>> This handles the case that the system call got interrupted. But
>>>>> you're not handling the restart correctly. When the ioctl is
>>>>> restarted, you don't know how many dmaunmaps are already done. So
>>>>> you'd need to make sure that repeating the dmaunmap is safe in all
>>>>> cases. Or what David tried earlier, find a way to track the
>>>>> unmapping so you
>>> only try to dmaunmap on GPUs where it's actually dmamapped.
>>>>   From previous discussion, no one likes add another variable to
>>>> track the
>>> unmappings. So I'm avoiding adding another variable.
>>>> Actually, all memory attachments use sg_table, ttm->sg is NULL? can
>>>> be used as
>>> an indicator to see whether an attachment is already unmapped.
>>>> That already unmapped will not be repeated.
>>> I think that should work. I'd add the checks in
>>> kfd_mem_dmaunmap_userptr and kfd_mem_dmaunmap_sg_bo, where we
>also
>>> set ttm->sg to NULL. In fact, both those functions already have that
>>> check. So looks like it should handle the the restart correctly with your patch.
>> Yes, both kfd_mem_dmaunmap_userptr() and kfd_mem_dmaunmap_sg_bo()
>have NULL check for ttm->sg.
>>
>> And dmabuf also have this check in amdgpu_ttm_backend_unbind(). So
>dmaunmap won't be repeated actually.
>>
>> Then the benefits of handling ERESTARTSYS is avoiding amdgpu_bo_reserve().
>>
>> What do you think? It's worth avoiding reservation in this case?
>
>I don't think it's worth the trouble. In fact, to avoid race conditions, you probably
>should take the reservation anyway before looking at ttm->sg.

Got it. Thanks.

Regards,
Lang

>Regards,
>   Felix
>
>
>>
>> Regards,
>> Lang
>>
>>> Regards,
>>>    Felix
>>>
>>>
>>>> Regards,
>>>> Lang
>>>>
>>>>> Regards,
>>>>>     Felix
>>>>>
>>>>>
>>>>>>        }
>>>>>>
>>>>>>        mutex_unlock(&p->mutex);

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-01-30  2:23 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-22  9:08 [PATCH v2] drm/amdkfd: reserve the BO before validating it Lang Yu
2024-01-24 21:40 ` Felix Kuehling
2024-01-26  1:59   ` Yu, Lang
2024-01-26 19:21     ` Felix Kuehling
2024-01-29  2:30       ` Yu, Lang
2024-01-29 14:57         ` Felix Kuehling
2024-01-30  2:23           ` Yu, Lang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.