linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Christian König" <christian.koenig@amd.com>
To: "Mikel Rychliski" <mikel@mikelr.com>,
	"Alex Deucher" <alexander.deucher@amd.com>,
	"Pan, Xinhui" <Xinhui.Pan@amd.com>,
	"David Airlie" <airlied@linux.ie>,
	"Daniel Vetter" <daniel@ffwll.ch>,
	"Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
	amd-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v3] drm/radeon: Fix NULL dereference when updating memory stats
Date: Wed, 30 Jun 2021 11:55:09 +0200	[thread overview]
Message-ID: <18a31cb0-9dae-0dc2-e33b-7ec555b586a8@amd.com> (raw)
In-Reply-To: <20210624045121.15643-1-mikel@mikelr.com>

Am 24.06.21 um 06:51 schrieb Mikel Rychliski:
> radeon_ttm_bo_destroy() is attempting to access the resource object to
> update memory counters. However, the resource object is already freed when
> ttm calls this function via the destroy callback. This causes an oops when
> a bo is freed:
>
> 	BUG: kernel NULL pointer dereference, address: 0000000000000010
> 	RIP: 0010:radeon_ttm_bo_destroy+0x2c/0x100 [radeon]
> 	Call Trace:
> 	 radeon_bo_unref+0x1a/0x30 [radeon]
> 	 radeon_gem_object_free+0x33/0x50 [radeon]
> 	 drm_gem_object_release_handle+0x69/0x70 [drm]
> 	 drm_gem_handle_delete+0x62/0xa0 [drm]
> 	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
> 	 drm_ioctl_kernel+0xb2/0xf0 [drm]
> 	 drm_ioctl+0x30a/0x3c0 [drm]
> 	 ? drm_mode_destroy_dumb+0x40/0x40 [drm]
> 	 radeon_drm_ioctl+0x49/0x80 [radeon]
> 	 __x64_sys_ioctl+0x8e/0xd0
>
> Avoid the issue by updating the counters in the delete_mem_notify callback
> instead. Also, fix memory statistic updating in radeon_bo_move() to
> identify the source type correctly. The source type needs to be saved
> before the move, because the moved from object may be altered by the move.
>
> Fixes: bfa3357ef9ab ("drm/ttm: allocate resource object instead of embedding it v2")
> Signed-off-by: Mikel Rychliski <mikel@mikelr.com>

So, back from vacation. I've reviewed and pushed the patch to drm-misc-next.

Thanks for the help,
Christian.

> ---
>   drivers/gpu/drm/radeon/radeon_object.c | 29 ++++++++++++-----------------
>   drivers/gpu/drm/radeon/radeon_object.h |  2 +-
>   drivers/gpu/drm/radeon/radeon_ttm.c    | 13 ++++++++++---
>   3 files changed, 23 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
> index bfaaa3c969a3..56ede9d63b12 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -49,23 +49,23 @@ static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
>    * function are calling it.
>    */
>   
> -static void radeon_update_memory_usage(struct radeon_bo *bo,
> -				       unsigned mem_type, int sign)
> +static void radeon_update_memory_usage(struct ttm_buffer_object *bo,
> +				       unsigned int mem_type, int sign)
>   {
> -	struct radeon_device *rdev = bo->rdev;
> +	struct radeon_device *rdev = radeon_get_rdev(bo->bdev);
>   
>   	switch (mem_type) {
>   	case TTM_PL_TT:
>   		if (sign > 0)
> -			atomic64_add(bo->tbo.base.size, &rdev->gtt_usage);
> +			atomic64_add(bo->base.size, &rdev->gtt_usage);
>   		else
> -			atomic64_sub(bo->tbo.base.size, &rdev->gtt_usage);
> +			atomic64_sub(bo->base.size, &rdev->gtt_usage);
>   		break;
>   	case TTM_PL_VRAM:
>   		if (sign > 0)
> -			atomic64_add(bo->tbo.base.size, &rdev->vram_usage);
> +			atomic64_add(bo->base.size, &rdev->vram_usage);
>   		else
> -			atomic64_sub(bo->tbo.base.size, &rdev->vram_usage);
> +			atomic64_sub(bo->base.size, &rdev->vram_usage);
>   		break;
>   	}
>   }
> @@ -76,8 +76,6 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
>   
>   	bo = container_of(tbo, struct radeon_bo, tbo);
>   
> -	radeon_update_memory_usage(bo, bo->tbo.resource->mem_type, -1);
> -
>   	mutex_lock(&bo->rdev->gem.mutex);
>   	list_del_init(&bo->list);
>   	mutex_unlock(&bo->rdev->gem.mutex);
> @@ -727,24 +725,21 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
>   }
>   
>   void radeon_bo_move_notify(struct ttm_buffer_object *bo,
> -			   bool evict,
> +			   unsigned int old_type,
>   			   struct ttm_resource *new_mem)
>   {
>   	struct radeon_bo *rbo;
>   
> +	radeon_update_memory_usage(bo, old_type, -1);
> +	if (new_mem)
> +		radeon_update_memory_usage(bo, new_mem->mem_type, 1);
> +
>   	if (!radeon_ttm_bo_is_radeon_bo(bo))
>   		return;
>   
>   	rbo = container_of(bo, struct radeon_bo, tbo);
>   	radeon_bo_check_tiling(rbo, 0, 1);
>   	radeon_vm_bo_invalidate(rbo->rdev, rbo);
> -
> -	/* update statistics */
> -	if (!new_mem)
> -		return;
> -
> -	radeon_update_memory_usage(rbo, bo->resource->mem_type, -1);
> -	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
>   }
>   
>   vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
> diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
> index 1739c6a142cd..1afc7992ef91 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.h
> +++ b/drivers/gpu/drm/radeon/radeon_object.h
> @@ -161,7 +161,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
>   extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
>   				bool force_drop);
>   extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
> -				  bool evict,
> +				  unsigned int old_type,
>   				  struct ttm_resource *new_mem);
>   extern vm_fault_t radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
>   extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);
> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
> index ad2a5a791bba..a06d4cc2fb1c 100644
> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> @@ -199,7 +199,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>   	struct ttm_resource *old_mem = bo->resource;
>   	struct radeon_device *rdev;
>   	struct radeon_bo *rbo;
> -	int r;
> +	int r, old_type;
>   
>   	if (new_mem->mem_type == TTM_PL_TT) {
>   		r = radeon_ttm_tt_bind(bo->bdev, bo->ttm, new_mem);
> @@ -216,6 +216,9 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>   	if (WARN_ON_ONCE(rbo->tbo.pin_count > 0))
>   		return -EINVAL;
>   
> +	/* Save old type for statistics update */
> +	old_type = old_mem->mem_type;
> +
>   	rdev = radeon_get_rdev(bo->bdev);
>   	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
>   		ttm_bo_move_null(bo, new_mem);
> @@ -261,7 +264,7 @@ static int radeon_bo_move(struct ttm_buffer_object *bo, bool evict,
>   out:
>   	/* update statistics */
>   	atomic64_add(bo->base.size, &rdev->num_bytes_moved);
> -	radeon_bo_move_notify(bo, evict, new_mem);
> +	radeon_bo_move_notify(bo, old_type, new_mem);
>   	return 0;
>   }
>   
> @@ -682,7 +685,11 @@ bool radeon_ttm_tt_is_readonly(struct radeon_device *rdev,
>   static void
>   radeon_bo_delete_mem_notify(struct ttm_buffer_object *bo)
>   {
> -	radeon_bo_move_notify(bo, false, NULL);
> +	unsigned int old_type = TTM_PL_SYSTEM;
> +
> +	if (bo->resource)
> +		old_type = bo->resource->mem_type;
> +	radeon_bo_move_notify(bo, old_type, NULL);
>   }
>   
>   static struct ttm_device_funcs radeon_bo_driver = {


  reply	other threads:[~2021-06-30  9:55 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-22 21:26 [PATCH v2] drm/radeon: Fix NULL dereference when updating memory stats Mikel Rychliski
2021-06-23  6:55 ` Christian König
2021-06-24  4:51   ` [PATCH v3] " Mikel Rychliski
2021-06-30  9:55     ` Christian König [this message]
2021-06-24  4:52   ` [PATCH v2] " Mikel Rychliski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=18a31cb0-9dae-0dc2-e33b-7ec555b586a8@amd.com \
    --to=christian.koenig@amd.com \
    --cc=Xinhui.Pan@amd.com \
    --cc=airlied@linux.ie \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=daniel@ffwll.ch \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mikel@mikelr.com \
    --cc=thomas.hellstrom@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).