amd-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: Fix one use-after-free of VM
@ 2022-04-12 12:03 xinhui pan
  2022-04-12 12:11 ` Christian König
  0 siblings, 1 reply; 5+ messages in thread
From: xinhui pan @ 2022-04-12 12:03 UTC (permalink / raw)
  To: amd-gfx; +Cc: alexander.deucher, xinhui pan, christian.koenig

VM might already be freed when amdgpu_vm_tlb_seq_cb() is called.
We see the calltrace below.

Fix it by keeping the last flush fence around and wait for it to signal

BUG kmalloc-4k (Not tainted): Poison overwritten

0xffff9c88630414e8-0xffff9c88630414e8 @offset=5352. First byte 0x6c
instead of 0x6b Allocated in amdgpu_driver_open_kms+0x9d/0x360 [amdgpu]
age=44 cpu=0 pid=2343
 __slab_alloc.isra.0+0x4f/0x90
 kmem_cache_alloc_trace+0x6b8/0x7a0
 amdgpu_driver_open_kms+0x9d/0x360 [amdgpu]
 drm_file_alloc+0x222/0x3e0 [drm]
 drm_open+0x11d/0x410 [drm]
 drm_stub_open+0xdc/0x230 [drm]
 chrdev_open+0xa5/0x1e0
 do_dentry_open+0x16c/0x3c0
 vfs_open+0x2d/0x30
 path_openat+0x70a/0xa90
 do_filp_open+0xb2/0x120
 do_sys_openat2+0x245/0x330
 do_sys_open+0x46/0x80
 __x64_sys_openat+0x20/0x30
 do_syscall_64+0x38/0xc0
 entry_SYSCALL_64_after_hwframe+0x44/0xae
Freed in amdgpu_driver_postclose_kms+0x3e9/0x550 [amdgpu] age=22 cpu=1
pid=2485
 kfree+0x4a2/0x580
 amdgpu_driver_postclose_kms+0x3e9/0x550 [amdgpu]
 drm_file_free+0x24e/0x3c0 [drm]
 drm_close_helper.isra.0+0x90/0xb0 [drm]
 drm_release+0x97/0x1a0 [drm]
 __fput+0xb6/0x280
 ____fput+0xe/0x10
 task_work_run+0x64/0xb0
 do_exit+0x406/0xcf0
 do_group_exit+0x50/0xc0
 __x64_sys_exit_group+0x18/0x20
 do_syscall_64+0x38/0xc0
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Suggested-by: Christian König <christian.koenig@amd.com>
Signed-off-by: xinhui pan <xinhui.pan@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 +++++++++++++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 +
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 645ce28277c2..e2486e95ca69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -932,9 +932,12 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 	if (flush_tlb || params.table_freed) {
 		tlb_cb->vm = vm;
-		if (!fence || !*fence ||
-		    dma_fence_add_callback(*fence, &tlb_cb->cb,
-					   amdgpu_vm_tlb_seq_cb))
+		if (fence && *fence &&
+		    !dma_fence_add_callback(*fence, &tlb_cb->cb,
+					   amdgpu_vm_tlb_seq_cb)) {
+			dma_fence_put(vm->last_delayed_tlb_flush);
+			vm->last_delayed_tlb_flush = dma_fence_get(*fence);
+		} else
 			amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
 		tlb_cb = NULL;
 	}
@@ -2258,6 +2261,19 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	dma_fence_wait(vm->last_unlocked, false);
 	dma_fence_put(vm->last_unlocked);
 
+	if (vm->last_delayed_tlb_flush) {
+		/* Wait until fence is signaled.
+		 * But must double check to make sure fence cb is called.
+		 * As dma_fence_default_wait checks DMA_FENCE_FLAG_SIGNALED_BIT without
+		 * holding fence lock(the first test_bit).
+		 * So call dma_fence_get_status which will hold the fence lock.
+		 * Then we can make sure fence cb has been called.
+		 */
+		(void)dma_fence_wait(vm->last_delayed_tlb_flush, false);
+		(void)dma_fence_get_status(vm->last_delayed_tlb_flush);
+		dma_fence_put(vm->last_delayed_tlb_flush);
+	}
+
 	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
 		if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
 			amdgpu_vm_prt_fini(adev, vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 1a814fbffff8..c1a48f5c1019 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -286,6 +286,7 @@ struct amdgpu_vm {
 
 	/* Last finished delayed update */
 	atomic64_t		tlb_seq;
+	struct dma_fence	*last_delayed_tlb_flush;
 
 	/* Last unlocked submission to the scheduler entities */
 	struct dma_fence	*last_unlocked;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] drm/amdgpu: Fix one use-after-free of VM
  2022-04-12 12:03 [PATCH] drm/amdgpu: Fix one use-after-free of VM xinhui pan
@ 2022-04-12 12:11 ` Christian König
  2022-04-13  3:07   ` 回复: " Pan, Xinhui
  2022-04-13 11:15   ` Daniel Vetter
  0 siblings, 2 replies; 5+ messages in thread
From: Christian König @ 2022-04-12 12:11 UTC (permalink / raw)
  To: xinhui pan, amd-gfx, Daniel Vetter; +Cc: alexander.deucher

Am 12.04.22 um 14:03 schrieb xinhui pan:
> VM might already be freed when amdgpu_vm_tlb_seq_cb() is called.
> We see the calltrace below.
>
> Fix it by keeping the last flush fence around and wait for it to signal
>
> BUG kmalloc-4k (Not tainted): Poison overwritten
>
> 0xffff9c88630414e8-0xffff9c88630414e8 @offset=5352. First byte 0x6c
> instead of 0x6b Allocated in amdgpu_driver_open_kms+0x9d/0x360 [amdgpu]
> age=44 cpu=0 pid=2343
>   __slab_alloc.isra.0+0x4f/0x90
>   kmem_cache_alloc_trace+0x6b8/0x7a0
>   amdgpu_driver_open_kms+0x9d/0x360 [amdgpu]
>   drm_file_alloc+0x222/0x3e0 [drm]
>   drm_open+0x11d/0x410 [drm]
>   drm_stub_open+0xdc/0x230 [drm]
>   chrdev_open+0xa5/0x1e0
>   do_dentry_open+0x16c/0x3c0
>   vfs_open+0x2d/0x30
>   path_openat+0x70a/0xa90
>   do_filp_open+0xb2/0x120
>   do_sys_openat2+0x245/0x330
>   do_sys_open+0x46/0x80
>   __x64_sys_openat+0x20/0x30
>   do_syscall_64+0x38/0xc0
>   entry_SYSCALL_64_after_hwframe+0x44/0xae
> Freed in amdgpu_driver_postclose_kms+0x3e9/0x550 [amdgpu] age=22 cpu=1
> pid=2485
>   kfree+0x4a2/0x580
>   amdgpu_driver_postclose_kms+0x3e9/0x550 [amdgpu]
>   drm_file_free+0x24e/0x3c0 [drm]
>   drm_close_helper.isra.0+0x90/0xb0 [drm]
>   drm_release+0x97/0x1a0 [drm]
>   __fput+0xb6/0x280
>   ____fput+0xe/0x10
>   task_work_run+0x64/0xb0
>   do_exit+0x406/0xcf0
>   do_group_exit+0x50/0xc0
>   __x64_sys_exit_group+0x18/0x20
>   do_syscall_64+0x38/0xc0
>   entry_SYSCALL_64_after_hwframe+0x44/0xae
>
> Suggested-by: Christian König <christian.koenig@amd.com>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 +++++++++++++++++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 +
>   2 files changed, 20 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 645ce28277c2..e2486e95ca69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -932,9 +932,12 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   
>   	if (flush_tlb || params.table_freed) {
>   		tlb_cb->vm = vm;
> -		if (!fence || !*fence ||
> -		    dma_fence_add_callback(*fence, &tlb_cb->cb,
> -					   amdgpu_vm_tlb_seq_cb))
> +		if (fence && *fence &&
> +		    !dma_fence_add_callback(*fence, &tlb_cb->cb,
> +					   amdgpu_vm_tlb_seq_cb)) {
> +			dma_fence_put(vm->last_delayed_tlb_flush);
> +			vm->last_delayed_tlb_flush = dma_fence_get(*fence);
> +		} else
>   			amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
>   		tlb_cb = NULL;
>   	}
> @@ -2258,6 +2261,19 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   	dma_fence_wait(vm->last_unlocked, false);
>   	dma_fence_put(vm->last_unlocked);
>   
> +	if (vm->last_delayed_tlb_flush) {

You can initialize last_delayed_tlb_flush() with the dummy fence, see 
how last_unlocked works.

> +		/* Wait until fence is signaled.
> +		 * But must double check to make sure fence cb is called.
> +		 * As dma_fence_default_wait checks DMA_FENCE_FLAG_SIGNALED_BIT without
> +		 * holding fence lock(the first test_bit).
> +		 * So call dma_fence_get_status which will hold the fence lock.
> +		 * Then we can make sure fence cb has been called.
> +		 */

Uff, that is a really good point and most likely a bug in dma_fence_wait().

I'm pretty sure that a couple of other callers rely on that as well.

Daniel what's you opinion about that?

Thanks,
Christian.

> +		(void)dma_fence_wait(vm->last_delayed_tlb_flush, false);
> +		(void)dma_fence_get_status(vm->last_delayed_tlb_flush);
> +		dma_fence_put(vm->last_delayed_tlb_flush);
> +	}
> +
>   	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
>   		if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
>   			amdgpu_vm_prt_fini(adev, vm);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 1a814fbffff8..c1a48f5c1019 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -286,6 +286,7 @@ struct amdgpu_vm {
>   
>   	/* Last finished delayed update */
>   	atomic64_t		tlb_seq;
> +	struct dma_fence	*last_delayed_tlb_flush;
>   
>   	/* Last unlocked submission to the scheduler entities */
>   	struct dma_fence	*last_unlocked;


^ permalink raw reply	[flat|nested] 5+ messages in thread

* 回复: [PATCH] drm/amdgpu: Fix one use-after-free of VM
  2022-04-12 12:11 ` Christian König
@ 2022-04-13  3:07   ` Pan, Xinhui
  2022-04-13  8:34     ` Christian König
  2022-04-13 11:15   ` Daniel Vetter
  1 sibling, 1 reply; 5+ messages in thread
From: Pan, Xinhui @ 2022-04-13  3:07 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx, Daniel Vetter; +Cc: Deucher, Alexander

[AMD Official Use Only]

we make something like dma_fence_release does.

@@ -783,11 +783,15 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
        unsigned long flags;
        signed long ret = timeout ? timeout : 1;

-       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) &&
+           list_empty(&fence->cb_list))
                return ret;

        spin_lock_irqsave(fence->lock, flags);

+       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+               goto out;
+
        if (intr && signal_pending(current)) {
                ret = -ERESTARTSYS;
                goto out;
________________________________________
发件人: Koenig, Christian <Christian.Koenig@amd.com>
发送时间: 2022年4月12日 20:11
收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org; Daniel Vetter
抄送: Deucher, Alexander
主题: Re: [PATCH] drm/amdgpu: Fix one use-after-free of VM

Am 12.04.22 um 14:03 schrieb xinhui pan:
> VM might already be freed when amdgpu_vm_tlb_seq_cb() is called.
> We see the calltrace below.
>
> Fix it by keeping the last flush fence around and wait for it to signal
>
> BUG kmalloc-4k (Not tainted): Poison overwritten
>
> 0xffff9c88630414e8-0xffff9c88630414e8 @offset=5352. First byte 0x6c
> instead of 0x6b Allocated in amdgpu_driver_open_kms+0x9d/0x360 [amdgpu]
> age=44 cpu=0 pid=2343
>   __slab_alloc.isra.0+0x4f/0x90
>   kmem_cache_alloc_trace+0x6b8/0x7a0
>   amdgpu_driver_open_kms+0x9d/0x360 [amdgpu]
>   drm_file_alloc+0x222/0x3e0 [drm]
>   drm_open+0x11d/0x410 [drm]
>   drm_stub_open+0xdc/0x230 [drm]
>   chrdev_open+0xa5/0x1e0
>   do_dentry_open+0x16c/0x3c0
>   vfs_open+0x2d/0x30
>   path_openat+0x70a/0xa90
>   do_filp_open+0xb2/0x120
>   do_sys_openat2+0x245/0x330
>   do_sys_open+0x46/0x80
>   __x64_sys_openat+0x20/0x30
>   do_syscall_64+0x38/0xc0
>   entry_SYSCALL_64_after_hwframe+0x44/0xae
> Freed in amdgpu_driver_postclose_kms+0x3e9/0x550 [amdgpu] age=22 cpu=1
> pid=2485
>   kfree+0x4a2/0x580
>   amdgpu_driver_postclose_kms+0x3e9/0x550 [amdgpu]
>   drm_file_free+0x24e/0x3c0 [drm]
>   drm_close_helper.isra.0+0x90/0xb0 [drm]
>   drm_release+0x97/0x1a0 [drm]
>   __fput+0xb6/0x280
>   ____fput+0xe/0x10
>   task_work_run+0x64/0xb0
>   do_exit+0x406/0xcf0
>   do_group_exit+0x50/0xc0
>   __x64_sys_exit_group+0x18/0x20
>   do_syscall_64+0x38/0xc0
>   entry_SYSCALL_64_after_hwframe+0x44/0xae
>
> Suggested-by: Christian König <christian.koenig@amd.com>
> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 +++++++++++++++++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 +
>   2 files changed, 20 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 645ce28277c2..e2486e95ca69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -932,9 +932,12 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>
>       if (flush_tlb || params.table_freed) {
>               tlb_cb->vm = vm;
> -             if (!fence || !*fence ||
> -                 dma_fence_add_callback(*fence, &tlb_cb->cb,
> -                                        amdgpu_vm_tlb_seq_cb))
> +             if (fence && *fence &&
> +                 !dma_fence_add_callback(*fence, &tlb_cb->cb,
> +                                        amdgpu_vm_tlb_seq_cb)) {
> +                     dma_fence_put(vm->last_delayed_tlb_flush);
> +                     vm->last_delayed_tlb_flush = dma_fence_get(*fence);
> +             } else
>                       amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
>               tlb_cb = NULL;
>       }
> @@ -2258,6 +2261,19 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>       dma_fence_wait(vm->last_unlocked, false);
>       dma_fence_put(vm->last_unlocked);
>
> +     if (vm->last_delayed_tlb_flush) {

You can initialize last_delayed_tlb_flush() with the dummy fence, see
how last_unlocked works.

> +             /* Wait until fence is signaled.
> +              * But must double check to make sure fence cb is called.
> +              * As dma_fence_default_wait checks DMA_FENCE_FLAG_SIGNALED_BIT without
> +              * holding fence lock(the first test_bit).
> +              * So call dma_fence_get_status which will hold the fence lock.
> +              * Then we can make sure fence cb has been called.
> +              */

Uff, that is a really good point and most likely a bug in dma_fence_wait().

I'm pretty sure that a couple of other callers rely on that as well.

Daniel what's you opinion about that?

Thanks,
Christian.

> +             (void)dma_fence_wait(vm->last_delayed_tlb_flush, false);
> +             (void)dma_fence_get_status(vm->last_delayed_tlb_flush);
> +             dma_fence_put(vm->last_delayed_tlb_flush);
> +     }
> +
>       list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
>               if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
>                       amdgpu_vm_prt_fini(adev, vm);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 1a814fbffff8..c1a48f5c1019 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -286,6 +286,7 @@ struct amdgpu_vm {
>
>       /* Last finished delayed update */
>       atomic64_t              tlb_seq;
> +     struct dma_fence        *last_delayed_tlb_flush;
>
>       /* Last unlocked submission to the scheduler entities */
>       struct dma_fence        *last_unlocked;


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: 回复: [PATCH] drm/amdgpu: Fix one use-after-free of VM
  2022-04-13  3:07   ` 回复: " Pan, Xinhui
@ 2022-04-13  8:34     ` Christian König
  0 siblings, 0 replies; 5+ messages in thread
From: Christian König @ 2022-04-13  8:34 UTC (permalink / raw)
  To: Pan, Xinhui, Koenig, Christian, amd-gfx, Daniel Vetter; +Cc: Deucher, Alexander

I think for now we should just have a the following code in amdgpu_vm_fini:

dma_fence_wait(vm->last_tlb_flush, false);
/* Make sure that all fence callbacks have completed*/
spinlock(vm->last_tlb_flush->lock);
spinunlock(vm->last_tlb_flush->lock);
dma_fence_put(vm->last_tlb_flush);

Cleaning that up in a larger context can come later on and is probably 
my job as DMA-buf maintainer.

Thanks,
Christian.

Am 13.04.22 um 05:07 schrieb Pan, Xinhui:
> [AMD Official Use Only]
>
> we make something like dma_fence_release does.
>
> @@ -783,11 +783,15 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
>          unsigned long flags;
>          signed long ret = timeout ? timeout : 1;
>
> -       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
> +       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) &&
> +           list_empty(&fence->cb_list))
>                  return ret;
>
>          spin_lock_irqsave(fence->lock, flags);
>
> +       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
> +               goto out;
> +
>          if (intr && signal_pending(current)) {
>                  ret = -ERESTARTSYS;
>                  goto out;
> ________________________________________
> 发件人: Koenig, Christian <Christian.Koenig@amd.com>
> 发送时间: 2022年4月12日 20:11
> 收件人: Pan, Xinhui; amd-gfx@lists.freedesktop.org; Daniel Vetter
> 抄送: Deucher, Alexander
> 主题: Re: [PATCH] drm/amdgpu: Fix one use-after-free of VM
>
> Am 12.04.22 um 14:03 schrieb xinhui pan:
>> VM might already be freed when amdgpu_vm_tlb_seq_cb() is called.
>> We see the calltrace below.
>>
>> Fix it by keeping the last flush fence around and wait for it to signal
>>
>> BUG kmalloc-4k (Not tainted): Poison overwritten
>>
>> 0xffff9c88630414e8-0xffff9c88630414e8 @offset=5352. First byte 0x6c
>> instead of 0x6b Allocated in amdgpu_driver_open_kms+0x9d/0x360 [amdgpu]
>> age=44 cpu=0 pid=2343
>>    __slab_alloc.isra.0+0x4f/0x90
>>    kmem_cache_alloc_trace+0x6b8/0x7a0
>>    amdgpu_driver_open_kms+0x9d/0x360 [amdgpu]
>>    drm_file_alloc+0x222/0x3e0 [drm]
>>    drm_open+0x11d/0x410 [drm]
>>    drm_stub_open+0xdc/0x230 [drm]
>>    chrdev_open+0xa5/0x1e0
>>    do_dentry_open+0x16c/0x3c0
>>    vfs_open+0x2d/0x30
>>    path_openat+0x70a/0xa90
>>    do_filp_open+0xb2/0x120
>>    do_sys_openat2+0x245/0x330
>>    do_sys_open+0x46/0x80
>>    __x64_sys_openat+0x20/0x30
>>    do_syscall_64+0x38/0xc0
>>    entry_SYSCALL_64_after_hwframe+0x44/0xae
>> Freed in amdgpu_driver_postclose_kms+0x3e9/0x550 [amdgpu] age=22 cpu=1
>> pid=2485
>>    kfree+0x4a2/0x580
>>    amdgpu_driver_postclose_kms+0x3e9/0x550 [amdgpu]
>>    drm_file_free+0x24e/0x3c0 [drm]
>>    drm_close_helper.isra.0+0x90/0xb0 [drm]
>>    drm_release+0x97/0x1a0 [drm]
>>    __fput+0xb6/0x280
>>    ____fput+0xe/0x10
>>    task_work_run+0x64/0xb0
>>    do_exit+0x406/0xcf0
>>    do_group_exit+0x50/0xc0
>>    __x64_sys_exit_group+0x18/0x20
>>    do_syscall_64+0x38/0xc0
>>    entry_SYSCALL_64_after_hwframe+0x44/0xae
>>
>> Suggested-by: Christian König <christian.koenig@amd.com>
>> Signed-off-by: xinhui pan <xinhui.pan@amd.com>
>> ---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 +++++++++++++++++++---
>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 +
>>    2 files changed, 20 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 645ce28277c2..e2486e95ca69 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -932,9 +932,12 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>
>>        if (flush_tlb || params.table_freed) {
>>                tlb_cb->vm = vm;
>> -             if (!fence || !*fence ||
>> -                 dma_fence_add_callback(*fence, &tlb_cb->cb,
>> -                                        amdgpu_vm_tlb_seq_cb))
>> +             if (fence && *fence &&
>> +                 !dma_fence_add_callback(*fence, &tlb_cb->cb,
>> +                                        amdgpu_vm_tlb_seq_cb)) {
>> +                     dma_fence_put(vm->last_delayed_tlb_flush);
>> +                     vm->last_delayed_tlb_flush = dma_fence_get(*fence);
>> +             } else
>>                        amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
>>                tlb_cb = NULL;
>>        }
>> @@ -2258,6 +2261,19 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>>        dma_fence_wait(vm->last_unlocked, false);
>>        dma_fence_put(vm->last_unlocked);
>>
>> +     if (vm->last_delayed_tlb_flush) {
> You can initialize last_delayed_tlb_flush() with the dummy fence, see
> how last_unlocked works.
>
>> +             /* Wait until fence is signaled.
>> +              * But must double check to make sure fence cb is called.
>> +              * As dma_fence_default_wait checks DMA_FENCE_FLAG_SIGNALED_BIT without
>> +              * holding fence lock(the first test_bit).
>> +              * So call dma_fence_get_status which will hold the fence lock.
>> +              * Then we can make sure fence cb has been called.
>> +              */
> Uff, that is a really good point and most likely a bug in dma_fence_wait().
>
> I'm pretty sure that a couple of other callers rely on that as well.
>
> Daniel what's you opinion about that?
>
> Thanks,
> Christian.
>
>> +             (void)dma_fence_wait(vm->last_delayed_tlb_flush, false);
>> +             (void)dma_fence_get_status(vm->last_delayed_tlb_flush);
>> +             dma_fence_put(vm->last_delayed_tlb_flush);
>> +     }
>> +
>>        list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
>>                if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
>>                        amdgpu_vm_prt_fini(adev, vm);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index 1a814fbffff8..c1a48f5c1019 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -286,6 +286,7 @@ struct amdgpu_vm {
>>
>>        /* Last finished delayed update */
>>        atomic64_t              tlb_seq;
>> +     struct dma_fence        *last_delayed_tlb_flush;
>>
>>        /* Last unlocked submission to the scheduler entities */
>>        struct dma_fence        *last_unlocked;


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] drm/amdgpu: Fix one use-after-free of VM
  2022-04-12 12:11 ` Christian König
  2022-04-13  3:07   ` 回复: " Pan, Xinhui
@ 2022-04-13 11:15   ` Daniel Vetter
  1 sibling, 0 replies; 5+ messages in thread
From: Daniel Vetter @ 2022-04-13 11:15 UTC (permalink / raw)
  To: Christian König; +Cc: alexander.deucher, xinhui pan, amd-gfx

On Tue, 12 Apr 2022 at 14:11, Christian König <christian.koenig@amd.com> wrote:
>
> Am 12.04.22 um 14:03 schrieb xinhui pan:
> > VM might already be freed when amdgpu_vm_tlb_seq_cb() is called.
> > We see the calltrace below.
> >
> > Fix it by keeping the last flush fence around and wait for it to signal
> >
> > BUG kmalloc-4k (Not tainted): Poison overwritten
> >
> > 0xffff9c88630414e8-0xffff9c88630414e8 @offset=5352. First byte 0x6c
> > instead of 0x6b Allocated in amdgpu_driver_open_kms+0x9d/0x360 [amdgpu]
> > age=44 cpu=0 pid=2343
> >   __slab_alloc.isra.0+0x4f/0x90
> >   kmem_cache_alloc_trace+0x6b8/0x7a0
> >   amdgpu_driver_open_kms+0x9d/0x360 [amdgpu]
> >   drm_file_alloc+0x222/0x3e0 [drm]
> >   drm_open+0x11d/0x410 [drm]
> >   drm_stub_open+0xdc/0x230 [drm]
> >   chrdev_open+0xa5/0x1e0
> >   do_dentry_open+0x16c/0x3c0
> >   vfs_open+0x2d/0x30
> >   path_openat+0x70a/0xa90
> >   do_filp_open+0xb2/0x120
> >   do_sys_openat2+0x245/0x330
> >   do_sys_open+0x46/0x80
> >   __x64_sys_openat+0x20/0x30
> >   do_syscall_64+0x38/0xc0
> >   entry_SYSCALL_64_after_hwframe+0x44/0xae
> > Freed in amdgpu_driver_postclose_kms+0x3e9/0x550 [amdgpu] age=22 cpu=1
> > pid=2485
> >   kfree+0x4a2/0x580
> >   amdgpu_driver_postclose_kms+0x3e9/0x550 [amdgpu]
> >   drm_file_free+0x24e/0x3c0 [drm]
> >   drm_close_helper.isra.0+0x90/0xb0 [drm]
> >   drm_release+0x97/0x1a0 [drm]
> >   __fput+0xb6/0x280
> >   ____fput+0xe/0x10
> >   task_work_run+0x64/0xb0
> >   do_exit+0x406/0xcf0
> >   do_group_exit+0x50/0xc0
> >   __x64_sys_exit_group+0x18/0x20
> >   do_syscall_64+0x38/0xc0
> >   entry_SYSCALL_64_after_hwframe+0x44/0xae
> >
> > Suggested-by: Christian König <christian.koenig@amd.com>
> > Signed-off-by: xinhui pan <xinhui.pan@amd.com>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 +++++++++++++++++++---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  1 +
> >   2 files changed, 20 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index 645ce28277c2..e2486e95ca69 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -932,9 +932,12 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> >
> >       if (flush_tlb || params.table_freed) {
> >               tlb_cb->vm = vm;
> > -             if (!fence || !*fence ||
> > -                 dma_fence_add_callback(*fence, &tlb_cb->cb,
> > -                                        amdgpu_vm_tlb_seq_cb))
> > +             if (fence && *fence &&
> > +                 !dma_fence_add_callback(*fence, &tlb_cb->cb,
> > +                                        amdgpu_vm_tlb_seq_cb)) {
> > +                     dma_fence_put(vm->last_delayed_tlb_flush);
> > +                     vm->last_delayed_tlb_flush = dma_fence_get(*fence);
> > +             } else
> >                       amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
> >               tlb_cb = NULL;
> >       }
> > @@ -2258,6 +2261,19 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> >       dma_fence_wait(vm->last_unlocked, false);
> >       dma_fence_put(vm->last_unlocked);
> >
> > +     if (vm->last_delayed_tlb_flush) {
>
> You can initialize last_delayed_tlb_flush() with the dummy fence, see
> how last_unlocked works.
>
> > +             /* Wait until fence is signaled.
> > +              * But must double check to make sure fence cb is called.
> > +              * As dma_fence_default_wait checks DMA_FENCE_FLAG_SIGNALED_BIT without
> > +              * holding fence lock(the first test_bit).
> > +              * So call dma_fence_get_status which will hold the fence lock.
> > +              * Then we can make sure fence cb has been called.
> > +              */
>
> Uff, that is a really good point and most likely a bug in dma_fence_wait().
>
> I'm pretty sure that a couple of other callers rely on that as well.
>
> Daniel what's you opinion about that?

dma_fence_wait + dma_fence_signal provide a barrier (like the other
wake_up/wait function pairs we have), but nothing more. So you're not
really guaranteed at all that any other waiters have received the
wakeup. This is also why we had that wording that waiters need to hold
a dma_fence reference or things can go boom. This is also standard
linux and I have honestly no idea how we could guarantee more without
either making this all more expensive (forcing more refcounting would
probably be needed) or making it look like there's a guarantee that
really doesn't hold when you try to use it. wait/wake_up functions
pair really should not provide more ordering than just the barrier
(and that barrier is even conditional on "an actual wake-up has
happened").

I'm not exactly sure how to best fix this here, but I guess you either
want your own spinlock to protect the link between the fence and the
vm, or some other refcounting scheme changes (like you have the gpu
ctx that run on top of a vm hold the refence on the fence, and the
fence itself holds a full reference on the vm) to make sure there's
not use after free here.

I don't think the spinlock fence you propose below is enough, I think
you also need to protect any vm dereference from under that spinlock
(i.e. set some vm pointer to NULL while holding that spinlock, or
whatever you need to do to unlink the fence from the vm).
-Daniel

>
> Thanks,
> Christian.
>
> > +             (void)dma_fence_wait(vm->last_delayed_tlb_flush, false);
> > +             (void)dma_fence_get_status(vm->last_delayed_tlb_flush);
> > +             dma_fence_put(vm->last_delayed_tlb_flush);
> > +     }
> > +
> >       list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
> >               if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
> >                       amdgpu_vm_prt_fini(adev, vm);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > index 1a814fbffff8..c1a48f5c1019 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > @@ -286,6 +286,7 @@ struct amdgpu_vm {
> >
> >       /* Last finished delayed update */
> >       atomic64_t              tlb_seq;
> > +     struct dma_fence        *last_delayed_tlb_flush;
> >
> >       /* Last unlocked submission to the scheduler entities */
> >       struct dma_fence        *last_unlocked;
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2022-04-13 11:15 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-12 12:03 [PATCH] drm/amdgpu: Fix one use-after-free of VM xinhui pan
2022-04-12 12:11 ` Christian König
2022-04-13  3:07   ` 回复: " Pan, Xinhui
2022-04-13  8:34     ` Christian König
2022-04-13 11:15   ` Daniel Vetter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).