All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
@ 2021-05-27 14:19 ` Thomas Hellström
  0 siblings, 0 replies; 25+ messages in thread
From: Thomas Hellström @ 2021-05-27 14:19 UTC (permalink / raw)
  To: intel-gfx, dri-devel, christian.koenig; +Cc: Thomas Hellström

The swapping code was dereference bo->ttm pointers without having the
dma-resv lock held. Also it might try to swap out unpopulated bos.

Fix this by moving the bo->ttm dereference until we have the reservation
lock. Check that the ttm_tt is populated after the swap_notify callback.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
 drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9f53506a82fc..86213d37657b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL))
 		return -EBUSY;
 
+	dma_resv_assert_held(bo->base.resv);
+
+	if (!bo->ttm ||
+	    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
+	    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
+		if (locked)
+			dma_resv_unlock(bo->base.resv);
+		return -EBUSY;
+	}
+
 	if (!ttm_bo_get_unless_zero(bo)) {
 		if (locked)
 			dma_resv_unlock(bo->base.resv);
@@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 	if (bo->bdev->funcs->swap_notify)
 		bo->bdev->funcs->swap_notify(bo);
 
-	ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
+	if (ttm_tt_is_populated(bo->ttm))
+		ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
 out:
 
 	/*
@@ -1225,6 +1236,9 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 	if (locked)
 		dma_resv_unlock(bo->base.resv);
 	ttm_bo_put(bo);
+
+	/* Don't break locking rules. */
+	WARN_ON(ret == -EBUSY);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 460953dcad11..eaa7487ae404 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -143,14 +143,12 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 
 		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
 			list_for_each_entry(bo, &man->lru[j], lru) {
-				uint32_t num_pages;
+				pgoff_t num_pages;
 
-				if (!bo->ttm ||
-				    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
-				    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
+				if (!READ_ONCE(bo->ttm))
 					continue;
 
-				num_pages = bo->ttm->num_pages;
+				num_pages = bo->base.size >> PAGE_SHIFT;
 				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
 				/* ttm_bo_swapout has dropped the lru_lock */
 				if (!ret)
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [Intel-gfx] [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
@ 2021-05-27 14:19 ` Thomas Hellström
  0 siblings, 0 replies; 25+ messages in thread
From: Thomas Hellström @ 2021-05-27 14:19 UTC (permalink / raw)
  To: intel-gfx, dri-devel, christian.koenig; +Cc: Thomas Hellström

The swapping code was dereference bo->ttm pointers without having the
dma-resv lock held. Also it might try to swap out unpopulated bos.

Fix this by moving the bo->ttm dereference until we have the reservation
lock. Check that the ttm_tt is populated after the swap_notify callback.

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
 drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9f53506a82fc..86213d37657b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL))
 		return -EBUSY;
 
+	dma_resv_assert_held(bo->base.resv);
+
+	if (!bo->ttm ||
+	    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
+	    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
+		if (locked)
+			dma_resv_unlock(bo->base.resv);
+		return -EBUSY;
+	}
+
 	if (!ttm_bo_get_unless_zero(bo)) {
 		if (locked)
 			dma_resv_unlock(bo->base.resv);
@@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 	if (bo->bdev->funcs->swap_notify)
 		bo->bdev->funcs->swap_notify(bo);
 
-	ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
+	if (ttm_tt_is_populated(bo->ttm))
+		ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
 out:
 
 	/*
@@ -1225,6 +1236,9 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 	if (locked)
 		dma_resv_unlock(bo->base.resv);
 	ttm_bo_put(bo);
+
+	/* Don't break locking rules. */
+	WARN_ON(ret == -EBUSY);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 460953dcad11..eaa7487ae404 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -143,14 +143,12 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 
 		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
 			list_for_each_entry(bo, &man->lru[j], lru) {
-				uint32_t num_pages;
+				pgoff_t num_pages;
 
-				if (!bo->ttm ||
-				    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
-				    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
+				if (!READ_ONCE(bo->ttm))
 					continue;
 
-				num_pages = bo->ttm->num_pages;
+				num_pages = bo->base.size >> PAGE_SHIFT;
 				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
 				/* ttm_bo_swapout has dropped the lru_lock */
 				if (!ret)
-- 
2.31.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
  2021-05-27 14:19 ` [Intel-gfx] " Thomas Hellström
@ 2021-05-27 14:54   ` Christian König
  -1 siblings, 0 replies; 25+ messages in thread
From: Christian König @ 2021-05-27 14:54 UTC (permalink / raw)
  To: Thomas Hellström, intel-gfx, dri-devel

Am 27.05.21 um 16:19 schrieb Thomas Hellström:
> The swapping code was dereference bo->ttm pointers without having the
> dma-resv lock held. Also it might try to swap out unpopulated bos.
>
> Fix this by moving the bo->ttm dereference until we have the reservation
> lock. Check that the ttm_tt is populated after the swap_notify callback.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
>   drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
>   2 files changed, 18 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 9f53506a82fc..86213d37657b 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>   	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL))
>   		return -EBUSY;
>   
> +	dma_resv_assert_held(bo->base.resv);
> +
> +	if (!bo->ttm ||
> +	    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> +	    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
> +		if (locked)
> +			dma_resv_unlock(bo->base.resv);
> +		return -EBUSY;
> +	}
> +
>   	if (!ttm_bo_get_unless_zero(bo)) {
>   		if (locked)
>   			dma_resv_unlock(bo->base.resv);
> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>   	if (bo->bdev->funcs->swap_notify)
>   		bo->bdev->funcs->swap_notify(bo);
>   
> -	ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
> +	if (ttm_tt_is_populated(bo->ttm))
> +		ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);

Exactly that is what I won't recommend. We would try to swap out the 
same BO over and over again with that.

Why not move that to the check above as well?

Christian.

>   out:
>   
>   	/*
> @@ -1225,6 +1236,9 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>   	if (locked)
>   		dma_resv_unlock(bo->base.resv);
>   	ttm_bo_put(bo);
> +
> +	/* Don't break locking rules. */
> +	WARN_ON(ret == -EBUSY);
>   	return ret;
>   }
>   
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index 460953dcad11..eaa7487ae404 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -143,14 +143,12 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>   
>   		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
>   			list_for_each_entry(bo, &man->lru[j], lru) {
> -				uint32_t num_pages;
> +				pgoff_t num_pages;
>   
> -				if (!bo->ttm ||
> -				    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> -				    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
> +				if (!READ_ONCE(bo->ttm))
>   					continue;
>   
> -				num_pages = bo->ttm->num_pages;
> +				num_pages = bo->base.size >> PAGE_SHIFT;
>   				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
>   				/* ttm_bo_swapout has dropped the lru_lock */
>   				if (!ret)


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Intel-gfx] [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
@ 2021-05-27 14:54   ` Christian König
  0 siblings, 0 replies; 25+ messages in thread
From: Christian König @ 2021-05-27 14:54 UTC (permalink / raw)
  To: Thomas Hellström, intel-gfx, dri-devel

Am 27.05.21 um 16:19 schrieb Thomas Hellström:
> The swapping code was dereference bo->ttm pointers without having the
> dma-resv lock held. Also it might try to swap out unpopulated bos.
>
> Fix this by moving the bo->ttm dereference until we have the reservation
> lock. Check that the ttm_tt is populated after the swap_notify callback.
>
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
>   drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
>   2 files changed, 18 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 9f53506a82fc..86213d37657b 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>   	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL))
>   		return -EBUSY;
>   
> +	dma_resv_assert_held(bo->base.resv);
> +
> +	if (!bo->ttm ||
> +	    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> +	    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
> +		if (locked)
> +			dma_resv_unlock(bo->base.resv);
> +		return -EBUSY;
> +	}
> +
>   	if (!ttm_bo_get_unless_zero(bo)) {
>   		if (locked)
>   			dma_resv_unlock(bo->base.resv);
> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>   	if (bo->bdev->funcs->swap_notify)
>   		bo->bdev->funcs->swap_notify(bo);
>   
> -	ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
> +	if (ttm_tt_is_populated(bo->ttm))
> +		ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);

Exactly that is what I won't recommend. We would try to swap out the 
same BO over and over again with that.

Why not move that to the check above as well?

Christian.

>   out:
>   
>   	/*
> @@ -1225,6 +1236,9 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
>   	if (locked)
>   		dma_resv_unlock(bo->base.resv);
>   	ttm_bo_put(bo);
> +
> +	/* Don't break locking rules. */
> +	WARN_ON(ret == -EBUSY);
>   	return ret;
>   }
>   
> diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
> index 460953dcad11..eaa7487ae404 100644
> --- a/drivers/gpu/drm/ttm/ttm_device.c
> +++ b/drivers/gpu/drm/ttm/ttm_device.c
> @@ -143,14 +143,12 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
>   
>   		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
>   			list_for_each_entry(bo, &man->lru[j], lru) {
> -				uint32_t num_pages;
> +				pgoff_t num_pages;
>   
> -				if (!bo->ttm ||
> -				    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> -				    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
> +				if (!READ_ONCE(bo->ttm))
>   					continue;
>   
> -				num_pages = bo->ttm->num_pages;
> +				num_pages = bo->base.size >> PAGE_SHIFT;
>   				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
>   				/* ttm_bo_swapout has dropped the lru_lock */
>   				if (!ret)

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
  2021-05-27 14:54   ` [Intel-gfx] " Christian König
@ 2021-05-27 15:01     ` Thomas Hellström
  -1 siblings, 0 replies; 25+ messages in thread
From: Thomas Hellström @ 2021-05-27 15:01 UTC (permalink / raw)
  To: Christian König, intel-gfx, dri-devel

On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
> Am 27.05.21 um 16:19 schrieb Thomas Hellström:
> > The swapping code was dereference bo->ttm pointers without having
> > the
> > dma-resv lock held. Also it might try to swap out unpopulated bos.
> > 
> > Fix this by moving the bo->ttm dereference until we have the
> > reservation
> > lock. Check that the ttm_tt is populated after the swap_notify
> > callback.
> > 
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > ---
> >   drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
> >   drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
> >   2 files changed, 18 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> > b/drivers/gpu/drm/ttm/ttm_bo.c
> > index 9f53506a82fc..86213d37657b 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object
> > *bo, struct ttm_operation_ctx *ctx,
> >         if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place,
> > &locked, NULL))
> >                 return -EBUSY;
> >   
> > +       dma_resv_assert_held(bo->base.resv);
> > +
> > +       if (!bo->ttm ||
> > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
> > +               if (locked)
> > +                       dma_resv_unlock(bo->base.resv);
> > +               return -EBUSY;
> > +       }
> > +
> >         if (!ttm_bo_get_unless_zero(bo)) {
> >                 if (locked)
> >                         dma_resv_unlock(bo->base.resv);
> > @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object
> > *bo, struct ttm_operation_ctx *ctx,
> >         if (bo->bdev->funcs->swap_notify)
> >                 bo->bdev->funcs->swap_notify(bo);
> >   
> > -       ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
> > +       if (ttm_tt_is_populated(bo->ttm))
> > +               ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
> 
> Exactly that is what I won't recommend. We would try to swap out the 
> same BO over and over again with that.

But we wouldn't since the BO is taken off the LRU and never re-added,

> 
> Why not move that to the check above as well?

Because the BO may become unpopulated in swap_notify(), i915, like
vmwgfx, sometimes sets up gpu bindings from system, and when we get a
notification from user-space that those are purgeable, we don't want to
purge immediately but wait for a potential swapout.

/Thomas


> 
> Christian.
> 
> >   out:
> >   
> >         /*
> > @@ -1225,6 +1236,9 @@ int ttm_bo_swapout(struct ttm_buffer_object
> > *bo, struct ttm_operation_ctx *ctx,
> >         if (locked)
> >                 dma_resv_unlock(bo->base.resv);
> >         ttm_bo_put(bo);
> > +
> > +       /* Don't break locking rules. */
> > +       WARN_ON(ret == -EBUSY);
> >         return ret;
> >   }
> >   
> > diff --git a/drivers/gpu/drm/ttm/ttm_device.c
> > b/drivers/gpu/drm/ttm/ttm_device.c
> > index 460953dcad11..eaa7487ae404 100644
> > --- a/drivers/gpu/drm/ttm/ttm_device.c
> > +++ b/drivers/gpu/drm/ttm/ttm_device.c
> > @@ -143,14 +143,12 @@ int ttm_device_swapout(struct ttm_device
> > *bdev, struct ttm_operation_ctx *ctx,
> >   
> >                 for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
> >                         list_for_each_entry(bo, &man->lru[j], lru)
> > {
> > -                               uint32_t num_pages;
> > +                               pgoff_t num_pages;
> >   
> > -                               if (!bo->ttm ||
> > -                                   bo->ttm->page_flags &
> > TTM_PAGE_FLAG_SG ||
> > -                                   bo->ttm->page_flags &
> > TTM_PAGE_FLAG_SWAPPED)
> > +                               if (!READ_ONCE(bo->ttm))
> >                                         continue;
> >   
> > -                               num_pages = bo->ttm->num_pages;
> > +                               num_pages = bo->base.size >>
> > PAGE_SHIFT;
> >                                 ret = ttm_bo_swapout(bo, ctx,
> > gfp_flags);
> >                                 /* ttm_bo_swapout has dropped the
> > lru_lock */
> >                                 if (!ret)
> 



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Intel-gfx] [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
@ 2021-05-27 15:01     ` Thomas Hellström
  0 siblings, 0 replies; 25+ messages in thread
From: Thomas Hellström @ 2021-05-27 15:01 UTC (permalink / raw)
  To: Christian König, intel-gfx, dri-devel

On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
> Am 27.05.21 um 16:19 schrieb Thomas Hellström:
> > The swapping code was dereference bo->ttm pointers without having
> > the
> > dma-resv lock held. Also it might try to swap out unpopulated bos.
> > 
> > Fix this by moving the bo->ttm dereference until we have the
> > reservation
> > lock. Check that the ttm_tt is populated after the swap_notify
> > callback.
> > 
> > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > ---
> >   drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
> >   drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
> >   2 files changed, 18 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> > b/drivers/gpu/drm/ttm/ttm_bo.c
> > index 9f53506a82fc..86213d37657b 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object
> > *bo, struct ttm_operation_ctx *ctx,
> >         if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place,
> > &locked, NULL))
> >                 return -EBUSY;
> >   
> > +       dma_resv_assert_held(bo->base.resv);
> > +
> > +       if (!bo->ttm ||
> > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
> > +               if (locked)
> > +                       dma_resv_unlock(bo->base.resv);
> > +               return -EBUSY;
> > +       }
> > +
> >         if (!ttm_bo_get_unless_zero(bo)) {
> >                 if (locked)
> >                         dma_resv_unlock(bo->base.resv);
> > @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object
> > *bo, struct ttm_operation_ctx *ctx,
> >         if (bo->bdev->funcs->swap_notify)
> >                 bo->bdev->funcs->swap_notify(bo);
> >   
> > -       ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
> > +       if (ttm_tt_is_populated(bo->ttm))
> > +               ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
> 
> Exactly that is what I won't recommend. We would try to swap out the 
> same BO over and over again with that.

But we wouldn't since the BO is taken off the LRU and never re-added,

> 
> Why not move that to the check above as well?

Because the BO may become unpopulated in swap_notify(), i915, like
vmwgfx, sometimes sets up gpu bindings from system, and when we get a
notification from user-space that those are purgeable, we don't want to
purge immediately but wait for a potential swapout.

/Thomas


> 
> Christian.
> 
> >   out:
> >   
> >         /*
> > @@ -1225,6 +1236,9 @@ int ttm_bo_swapout(struct ttm_buffer_object
> > *bo, struct ttm_operation_ctx *ctx,
> >         if (locked)
> >                 dma_resv_unlock(bo->base.resv);
> >         ttm_bo_put(bo);
> > +
> > +       /* Don't break locking rules. */
> > +       WARN_ON(ret == -EBUSY);
> >         return ret;
> >   }
> >   
> > diff --git a/drivers/gpu/drm/ttm/ttm_device.c
> > b/drivers/gpu/drm/ttm/ttm_device.c
> > index 460953dcad11..eaa7487ae404 100644
> > --- a/drivers/gpu/drm/ttm/ttm_device.c
> > +++ b/drivers/gpu/drm/ttm/ttm_device.c
> > @@ -143,14 +143,12 @@ int ttm_device_swapout(struct ttm_device
> > *bdev, struct ttm_operation_ctx *ctx,
> >   
> >                 for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
> >                         list_for_each_entry(bo, &man->lru[j], lru)
> > {
> > -                               uint32_t num_pages;
> > +                               pgoff_t num_pages;
> >   
> > -                               if (!bo->ttm ||
> > -                                   bo->ttm->page_flags &
> > TTM_PAGE_FLAG_SG ||
> > -                                   bo->ttm->page_flags &
> > TTM_PAGE_FLAG_SWAPPED)
> > +                               if (!READ_ONCE(bo->ttm))
> >                                         continue;
> >   
> > -                               num_pages = bo->ttm->num_pages;
> > +                               num_pages = bo->base.size >>
> > PAGE_SHIFT;
> >                                 ret = ttm_bo_swapout(bo, ctx,
> > gfp_flags);
> >                                 /* ttm_bo_swapout has dropped the
> > lru_lock */
> >                                 if (!ret)
> 


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
  2021-05-27 15:01     ` [Intel-gfx] " Thomas Hellström
@ 2021-05-27 15:05       ` Thomas Hellström
  -1 siblings, 0 replies; 25+ messages in thread
From: Thomas Hellström @ 2021-05-27 15:05 UTC (permalink / raw)
  To: Christian König, intel-gfx, dri-devel

On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
> On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
> > Am 27.05.21 um 16:19 schrieb Thomas Hellström:
> > > The swapping code was dereference bo->ttm pointers without having
> > > the
> > > dma-resv lock held. Also it might try to swap out unpopulated
> > > bos.
> > > 
> > > Fix this by moving the bo->ttm dereference until we have the
> > > reservation
> > > lock. Check that the ttm_tt is populated after the swap_notify
> > > callback.
> > > 
> > > Signed-off-by: Thomas Hellström
> > > <thomas.hellstrom@linux.intel.com>
> > > ---
> > >   drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
> > >   drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
> > >   2 files changed, 18 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> > > b/drivers/gpu/drm/ttm/ttm_bo.c
> > > index 9f53506a82fc..86213d37657b 100644
> > > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > > @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
> > > ttm_buffer_object
> > > *bo, struct ttm_operation_ctx *ctx,
> > >         if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place,
> > > &locked, NULL))
> > >                 return -EBUSY;
> > >   
> > > +       dma_resv_assert_held(bo->base.resv);
> > > +
> > > +       if (!bo->ttm ||
> > > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> > > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
> > > +               if (locked)
> > > +                       dma_resv_unlock(bo->base.resv);
> > > +               return -EBUSY;
> > > +       }
> > > +
> > >         if (!ttm_bo_get_unless_zero(bo)) {
> > >                 if (locked)
> > >                         dma_resv_unlock(bo->base.resv);
> > > @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object
> > > *bo, struct ttm_operation_ctx *ctx,
> > >         if (bo->bdev->funcs->swap_notify)
> > >                 bo->bdev->funcs->swap_notify(bo);
> > >   
> > > -       ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
> > > +       if (ttm_tt_is_populated(bo->ttm))
> > > +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
> > > gfp_flags);
> > 
> > Exactly that is what I won't recommend. We would try to swap out
> > the 
> > same BO over and over again with that.
> 
> But we wouldn't since the BO is taken off the LRU and never re-added,
> 
> 
In fact, we'd probably might want to take the !bo->ttm bos off the LRU
as well..

/Thomas


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Intel-gfx] [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
@ 2021-05-27 15:05       ` Thomas Hellström
  0 siblings, 0 replies; 25+ messages in thread
From: Thomas Hellström @ 2021-05-27 15:05 UTC (permalink / raw)
  To: Christian König, intel-gfx, dri-devel

On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
> On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
> > Am 27.05.21 um 16:19 schrieb Thomas Hellström:
> > > The swapping code was dereference bo->ttm pointers without having
> > > the
> > > dma-resv lock held. Also it might try to swap out unpopulated
> > > bos.
> > > 
> > > Fix this by moving the bo->ttm dereference until we have the
> > > reservation
> > > lock. Check that the ttm_tt is populated after the swap_notify
> > > callback.
> > > 
> > > Signed-off-by: Thomas Hellström
> > > <thomas.hellstrom@linux.intel.com>
> > > ---
> > >   drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
> > >   drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
> > >   2 files changed, 18 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> > > b/drivers/gpu/drm/ttm/ttm_bo.c
> > > index 9f53506a82fc..86213d37657b 100644
> > > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > > @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
> > > ttm_buffer_object
> > > *bo, struct ttm_operation_ctx *ctx,
> > >         if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place,
> > > &locked, NULL))
> > >                 return -EBUSY;
> > >   
> > > +       dma_resv_assert_held(bo->base.resv);
> > > +
> > > +       if (!bo->ttm ||
> > > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> > > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
> > > +               if (locked)
> > > +                       dma_resv_unlock(bo->base.resv);
> > > +               return -EBUSY;
> > > +       }
> > > +
> > >         if (!ttm_bo_get_unless_zero(bo)) {
> > >                 if (locked)
> > >                         dma_resv_unlock(bo->base.resv);
> > > @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object
> > > *bo, struct ttm_operation_ctx *ctx,
> > >         if (bo->bdev->funcs->swap_notify)
> > >                 bo->bdev->funcs->swap_notify(bo);
> > >   
> > > -       ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
> > > +       if (ttm_tt_is_populated(bo->ttm))
> > > +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
> > > gfp_flags);
> > 
> > Exactly that is what I won't recommend. We would try to swap out
> > the 
> > same BO over and over again with that.
> 
> But we wouldn't since the BO is taken off the LRU and never re-added,
> 
> 
In fact, we'd probably might want to take the !bo->ttm bos off the LRU
as well..

/Thomas

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
  2021-05-27 15:01     ` [Intel-gfx] " Thomas Hellström
@ 2021-05-27 15:17       ` Christian König
  -1 siblings, 0 replies; 25+ messages in thread
From: Christian König @ 2021-05-27 15:17 UTC (permalink / raw)
  To: Thomas Hellström, intel-gfx, dri-devel

Am 27.05.21 um 17:01 schrieb Thomas Hellström:
> On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
>> Am 27.05.21 um 16:19 schrieb Thomas Hellström:
>>> The swapping code was dereference bo->ttm pointers without having
>>> the
>>> dma-resv lock held. Also it might try to swap out unpopulated bos.
>>>
>>> Fix this by moving the bo->ttm dereference until we have the
>>> reservation
>>> lock. Check that the ttm_tt is populated after the swap_notify
>>> callback.
>>>
>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>> ---
>>>    drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
>>>    drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
>>>    2 files changed, 18 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>> index 9f53506a82fc..86213d37657b 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object
>>> *bo, struct ttm_operation_ctx *ctx,
>>>          if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place,
>>> &locked, NULL))
>>>                  return -EBUSY;
>>>    
>>> +       dma_resv_assert_held(bo->base.resv);
>>> +
>>> +       if (!bo->ttm ||
>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
>>> +               if (locked)
>>> +                       dma_resv_unlock(bo->base.resv);
>>> +               return -EBUSY;
>>> +       }
>>> +
>>>          if (!ttm_bo_get_unless_zero(bo)) {
>>>                  if (locked)
>>>                          dma_resv_unlock(bo->base.resv);
>>> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object
>>> *bo, struct ttm_operation_ctx *ctx,
>>>          if (bo->bdev->funcs->swap_notify)
>>>                  bo->bdev->funcs->swap_notify(bo);
>>>    
>>> -       ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
>>> +       if (ttm_tt_is_populated(bo->ttm))
>>> +               ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
>> Exactly that is what I won't recommend. We would try to swap out the
>> same BO over and over again with that.
> But we wouldn't since the BO is taken off the LRU and never re-added,

Well then that would be a bug in itself.

>> Why not move that to the check above as well?
> Because the BO may become unpopulated in swap_notify(), i915, like
> vmwgfx, sometimes sets up gpu bindings from system, and when we get a
> notification from user-space that those are purgeable, we don't want to
> purge immediately but wait for a potential swapout.

Uff, good point. But then we need to check that at both locations I think.

Because populating the TT object currently doesn't put the BO back on 
the LRU eventually.

Christian.

>
> /Thomas
>
>
>> Christian.
>>
>>>    out:
>>>    
>>>          /*
>>> @@ -1225,6 +1236,9 @@ int ttm_bo_swapout(struct ttm_buffer_object
>>> *bo, struct ttm_operation_ctx *ctx,
>>>          if (locked)
>>>                  dma_resv_unlock(bo->base.resv);
>>>          ttm_bo_put(bo);
>>> +
>>> +       /* Don't break locking rules. */
>>> +       WARN_ON(ret == -EBUSY);
>>>          return ret;
>>>    }
>>>    
>>> diff --git a/drivers/gpu/drm/ttm/ttm_device.c
>>> b/drivers/gpu/drm/ttm/ttm_device.c
>>> index 460953dcad11..eaa7487ae404 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_device.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_device.c
>>> @@ -143,14 +143,12 @@ int ttm_device_swapout(struct ttm_device
>>> *bdev, struct ttm_operation_ctx *ctx,
>>>    
>>>                  for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
>>>                          list_for_each_entry(bo, &man->lru[j], lru)
>>> {
>>> -                               uint32_t num_pages;
>>> +                               pgoff_t num_pages;
>>>    
>>> -                               if (!bo->ttm ||
>>> -                                   bo->ttm->page_flags &
>>> TTM_PAGE_FLAG_SG ||
>>> -                                   bo->ttm->page_flags &
>>> TTM_PAGE_FLAG_SWAPPED)
>>> +                               if (!READ_ONCE(bo->ttm))
>>>                                          continue;
>>>    
>>> -                               num_pages = bo->ttm->num_pages;
>>> +                               num_pages = bo->base.size >>
>>> PAGE_SHIFT;
>>>                                  ret = ttm_bo_swapout(bo, ctx,
>>> gfp_flags);
>>>                                  /* ttm_bo_swapout has dropped the
>>> lru_lock */
>>>                                  if (!ret)
>


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Intel-gfx] [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
@ 2021-05-27 15:17       ` Christian König
  0 siblings, 0 replies; 25+ messages in thread
From: Christian König @ 2021-05-27 15:17 UTC (permalink / raw)
  To: Thomas Hellström, intel-gfx, dri-devel

Am 27.05.21 um 17:01 schrieb Thomas Hellström:
> On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
>> Am 27.05.21 um 16:19 schrieb Thomas Hellström:
>>> The swapping code was dereference bo->ttm pointers without having
>>> the
>>> dma-resv lock held. Also it might try to swap out unpopulated bos.
>>>
>>> Fix this by moving the bo->ttm dereference until we have the
>>> reservation
>>> lock. Check that the ttm_tt is populated after the swap_notify
>>> callback.
>>>
>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>> ---
>>>    drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
>>>    drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
>>>    2 files changed, 18 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>> index 9f53506a82fc..86213d37657b 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct ttm_buffer_object
>>> *bo, struct ttm_operation_ctx *ctx,
>>>          if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place,
>>> &locked, NULL))
>>>                  return -EBUSY;
>>>    
>>> +       dma_resv_assert_held(bo->base.resv);
>>> +
>>> +       if (!bo->ttm ||
>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
>>> +               if (locked)
>>> +                       dma_resv_unlock(bo->base.resv);
>>> +               return -EBUSY;
>>> +       }
>>> +
>>>          if (!ttm_bo_get_unless_zero(bo)) {
>>>                  if (locked)
>>>                          dma_resv_unlock(bo->base.resv);
>>> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object
>>> *bo, struct ttm_operation_ctx *ctx,
>>>          if (bo->bdev->funcs->swap_notify)
>>>                  bo->bdev->funcs->swap_notify(bo);
>>>    
>>> -       ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
>>> +       if (ttm_tt_is_populated(bo->ttm))
>>> +               ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
>> Exactly that is what I won't recommend. We would try to swap out the
>> same BO over and over again with that.
> But we wouldn't since the BO is taken off the LRU and never re-added,

Well then that would be a bug in itself.

>> Why not move that to the check above as well?
> Because the BO may become unpopulated in swap_notify(), i915, like
> vmwgfx, sometimes sets up gpu bindings from system, and when we get a
> notification from user-space that those are purgeable, we don't want to
> purge immediately but wait for a potential swapout.

Uff, good point. But then we need to check that at both locations I think.

Because populating the TT object currently doesn't put the BO back on 
the LRU eventually.

Christian.

>
> /Thomas
>
>
>> Christian.
>>
>>>    out:
>>>    
>>>          /*
>>> @@ -1225,6 +1236,9 @@ int ttm_bo_swapout(struct ttm_buffer_object
>>> *bo, struct ttm_operation_ctx *ctx,
>>>          if (locked)
>>>                  dma_resv_unlock(bo->base.resv);
>>>          ttm_bo_put(bo);
>>> +
>>> +       /* Don't break locking rules. */
>>> +       WARN_ON(ret == -EBUSY);
>>>          return ret;
>>>    }
>>>    
>>> diff --git a/drivers/gpu/drm/ttm/ttm_device.c
>>> b/drivers/gpu/drm/ttm/ttm_device.c
>>> index 460953dcad11..eaa7487ae404 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_device.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_device.c
>>> @@ -143,14 +143,12 @@ int ttm_device_swapout(struct ttm_device
>>> *bdev, struct ttm_operation_ctx *ctx,
>>>    
>>>                  for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
>>>                          list_for_each_entry(bo, &man->lru[j], lru)
>>> {
>>> -                               uint32_t num_pages;
>>> +                               pgoff_t num_pages;
>>>    
>>> -                               if (!bo->ttm ||
>>> -                                   bo->ttm->page_flags &
>>> TTM_PAGE_FLAG_SG ||
>>> -                                   bo->ttm->page_flags &
>>> TTM_PAGE_FLAG_SWAPPED)
>>> +                               if (!READ_ONCE(bo->ttm))
>>>                                          continue;
>>>    
>>> -                               num_pages = bo->ttm->num_pages;
>>> +                               num_pages = bo->base.size >>
>>> PAGE_SHIFT;
>>>                                  ret = ttm_bo_swapout(bo, ctx,
>>> gfp_flags);
>>>                                  /* ttm_bo_swapout has dropped the
>>> lru_lock */
>>>                                  if (!ret)
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
  2021-05-27 15:05       ` [Intel-gfx] " Thomas Hellström
@ 2021-05-27 15:32         ` Christian König
  -1 siblings, 0 replies; 25+ messages in thread
From: Christian König @ 2021-05-27 15:32 UTC (permalink / raw)
  To: Thomas Hellström, intel-gfx, dri-devel

Am 27.05.21 um 17:05 schrieb Thomas Hellström:
> On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
>> On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
>>> Am 27.05.21 um 16:19 schrieb Thomas Hellström:
>>>> The swapping code was dereference bo->ttm pointers without having
>>>> the
>>>> dma-resv lock held. Also it might try to swap out unpopulated
>>>> bos.
>>>>
>>>> Fix this by moving the bo->ttm dereference until we have the
>>>> reservation
>>>> lock. Check that the ttm_tt is populated after the swap_notify
>>>> callback.
>>>>
>>>> Signed-off-by: Thomas Hellström
>>>> <thomas.hellstrom@linux.intel.com>
>>>> ---
>>>>    drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
>>>>    drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
>>>>    2 files changed, 18 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> index 9f53506a82fc..86213d37657b 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
>>>> ttm_buffer_object
>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>          if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place,
>>>> &locked, NULL))
>>>>                  return -EBUSY;
>>>>    
>>>> +       dma_resv_assert_held(bo->base.resv);
>>>> +
>>>> +       if (!bo->ttm ||
>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
>>>> +               if (locked)
>>>> +                       dma_resv_unlock(bo->base.resv);
>>>> +               return -EBUSY;
>>>> +       }
>>>> +
>>>>          if (!ttm_bo_get_unless_zero(bo)) {
>>>>                  if (locked)
>>>>                          dma_resv_unlock(bo->base.resv);
>>>> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object
>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>          if (bo->bdev->funcs->swap_notify)
>>>>                  bo->bdev->funcs->swap_notify(bo);
>>>>    
>>>> -       ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
>>>> +       if (ttm_tt_is_populated(bo->ttm))
>>>> +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>> gfp_flags);
>>> Exactly that is what I won't recommend. We would try to swap out
>>> the
>>> same BO over and over again with that.
>> But we wouldn't since the BO is taken off the LRU and never re-added,
>>
>>
> In fact, we'd probably might want to take the !bo->ttm bos off the LRU
> as well..

No, we don't want to take any BOs of the LRU unless they are pinned.

Adding a TT object or populating it doesn't necessarily put the BO back 
to the LRU.

Christian.

>
> /Thomas
>


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Intel-gfx] [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
@ 2021-05-27 15:32         ` Christian König
  0 siblings, 0 replies; 25+ messages in thread
From: Christian König @ 2021-05-27 15:32 UTC (permalink / raw)
  To: Thomas Hellström, intel-gfx, dri-devel

Am 27.05.21 um 17:05 schrieb Thomas Hellström:
> On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
>> On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
>>> Am 27.05.21 um 16:19 schrieb Thomas Hellström:
>>>> The swapping code was dereference bo->ttm pointers without having
>>>> the
>>>> dma-resv lock held. Also it might try to swap out unpopulated
>>>> bos.
>>>>
>>>> Fix this by moving the bo->ttm dereference until we have the
>>>> reservation
>>>> lock. Check that the ttm_tt is populated after the swap_notify
>>>> callback.
>>>>
>>>> Signed-off-by: Thomas Hellström
>>>> <thomas.hellstrom@linux.intel.com>
>>>> ---
>>>>    drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
>>>>    drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
>>>>    2 files changed, 18 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> index 9f53506a82fc..86213d37657b 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
>>>> ttm_buffer_object
>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>          if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place,
>>>> &locked, NULL))
>>>>                  return -EBUSY;
>>>>    
>>>> +       dma_resv_assert_held(bo->base.resv);
>>>> +
>>>> +       if (!bo->ttm ||
>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
>>>> +               if (locked)
>>>> +                       dma_resv_unlock(bo->base.resv);
>>>> +               return -EBUSY;
>>>> +       }
>>>> +
>>>>          if (!ttm_bo_get_unless_zero(bo)) {
>>>>                  if (locked)
>>>>                          dma_resv_unlock(bo->base.resv);
>>>> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct ttm_buffer_object
>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>          if (bo->bdev->funcs->swap_notify)
>>>>                  bo->bdev->funcs->swap_notify(bo);
>>>>    
>>>> -       ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
>>>> +       if (ttm_tt_is_populated(bo->ttm))
>>>> +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>> gfp_flags);
>>> Exactly that is what I won't recommend. We would try to swap out
>>> the
>>> same BO over and over again with that.
>> But we wouldn't since the BO is taken off the LRU and never re-added,
>>
>>
> In fact, we'd probably might want to take the !bo->ttm bos off the LRU
> as well..

No, we don't want to take any BOs of the LRU unless they are pinned.

Adding a TT object or populating it doesn't necessarily put the BO back 
to the LRU.

Christian.

>
> /Thomas
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
  2021-05-27 15:32         ` [Intel-gfx] " Christian König
@ 2021-05-27 15:51           ` Thomas Hellström
  -1 siblings, 0 replies; 25+ messages in thread
From: Thomas Hellström @ 2021-05-27 15:51 UTC (permalink / raw)
  To: Christian König, intel-gfx, dri-devel

On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
> Am 27.05.21 um 17:05 schrieb Thomas Hellström:
> > On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
> > > On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
> > > > Am 27.05.21 um 16:19 schrieb Thomas Hellström:
> > > > > The swapping code was dereference bo->ttm pointers without
> > > > > having
> > > > > the
> > > > > dma-resv lock held. Also it might try to swap out unpopulated
> > > > > bos.
> > > > > 
> > > > > Fix this by moving the bo->ttm dereference until we have the
> > > > > reservation
> > > > > lock. Check that the ttm_tt is populated after the
> > > > > swap_notify
> > > > > callback.
> > > > > 
> > > > > Signed-off-by: Thomas Hellström
> > > > > <thomas.hellstrom@linux.intel.com>
> > > > > ---
> > > > >    drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
> > > > >    drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
> > > > >    2 files changed, 18 insertions(+), 6 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > b/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > index 9f53506a82fc..86213d37657b 100644
> > > > > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
> > > > > ttm_buffer_object
> > > > > *bo, struct ttm_operation_ctx *ctx,
> > > > >          if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place,
> > > > > &locked, NULL))
> > > > >                  return -EBUSY;
> > > > >    
> > > > > +       dma_resv_assert_held(bo->base.resv);
> > > > > +
> > > > > +       if (!bo->ttm ||
> > > > > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> > > > > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
> > > > > +               if (locked)
> > > > > +                       dma_resv_unlock(bo->base.resv);
> > > > > +               return -EBUSY;
> > > > > +       }
> > > > > +
> > > > >          if (!ttm_bo_get_unless_zero(bo)) {
> > > > >                  if (locked)
> > > > >                          dma_resv_unlock(bo->base.resv);
> > > > > @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct
> > > > > ttm_buffer_object
> > > > > *bo, struct ttm_operation_ctx *ctx,
> > > > >          if (bo->bdev->funcs->swap_notify)
> > > > >                  bo->bdev->funcs->swap_notify(bo);
> > > > >    
> > > > > -       ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
> > > > > +       if (ttm_tt_is_populated(bo->ttm))
> > > > > +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
> > > > > gfp_flags);
> > > > Exactly that is what I won't recommend. We would try to swap
> > > > out
> > > > the
> > > > same BO over and over again with that.
> > > But we wouldn't since the BO is taken off the LRU and never re-
> > > added,
> > > 
> > > 
> > In fact, we'd probably might want to take the !bo->ttm bos off the
> > LRU
> > as well..
> 
> No, we don't want to take any BOs of the LRU unless they are pinned.
> 
> Adding a TT object or populating it doesn't necessarily put the BO
> back 
> to the LRU.

OK, but swapped bos are also taken off the LRU list so these
unpopulated bos are just taking the same path. Only difference to
swapped is that they don't get read back on re-populate, but typically
cleared.

But what would be the point of keeping swapped-out bos on the LRU
list?, particularly when we're iterating under a spinlock?
Shouldn't we try to re-add to LRU (if not already on an LRU) just
before populating? There aren't really that many calls in core TTM.

/Thomas





> 
> Christian.
> 
> > 
> > /Thomas
> > 
> 



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Intel-gfx] [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
@ 2021-05-27 15:51           ` Thomas Hellström
  0 siblings, 0 replies; 25+ messages in thread
From: Thomas Hellström @ 2021-05-27 15:51 UTC (permalink / raw)
  To: Christian König, intel-gfx, dri-devel

On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
> Am 27.05.21 um 17:05 schrieb Thomas Hellström:
> > On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
> > > On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
> > > > Am 27.05.21 um 16:19 schrieb Thomas Hellström:
> > > > > The swapping code was dereference bo->ttm pointers without
> > > > > having
> > > > > the
> > > > > dma-resv lock held. Also it might try to swap out unpopulated
> > > > > bos.
> > > > > 
> > > > > Fix this by moving the bo->ttm dereference until we have the
> > > > > reservation
> > > > > lock. Check that the ttm_tt is populated after the
> > > > > swap_notify
> > > > > callback.
> > > > > 
> > > > > Signed-off-by: Thomas Hellström
> > > > > <thomas.hellstrom@linux.intel.com>
> > > > > ---
> > > > >    drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
> > > > >    drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
> > > > >    2 files changed, 18 insertions(+), 6 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > b/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > index 9f53506a82fc..86213d37657b 100644
> > > > > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
> > > > > ttm_buffer_object
> > > > > *bo, struct ttm_operation_ctx *ctx,
> > > > >          if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place,
> > > > > &locked, NULL))
> > > > >                  return -EBUSY;
> > > > >    
> > > > > +       dma_resv_assert_held(bo->base.resv);
> > > > > +
> > > > > +       if (!bo->ttm ||
> > > > > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> > > > > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
> > > > > +               if (locked)
> > > > > +                       dma_resv_unlock(bo->base.resv);
> > > > > +               return -EBUSY;
> > > > > +       }
> > > > > +
> > > > >          if (!ttm_bo_get_unless_zero(bo)) {
> > > > >                  if (locked)
> > > > >                          dma_resv_unlock(bo->base.resv);
> > > > > @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct
> > > > > ttm_buffer_object
> > > > > *bo, struct ttm_operation_ctx *ctx,
> > > > >          if (bo->bdev->funcs->swap_notify)
> > > > >                  bo->bdev->funcs->swap_notify(bo);
> > > > >    
> > > > > -       ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
> > > > > +       if (ttm_tt_is_populated(bo->ttm))
> > > > > +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
> > > > > gfp_flags);
> > > > Exactly that is what I won't recommend. We would try to swap
> > > > out
> > > > the
> > > > same BO over and over again with that.
> > > But we wouldn't since the BO is taken off the LRU and never re-
> > > added,
> > > 
> > > 
> > In fact, we'd probably might want to take the !bo->ttm bos off the
> > LRU
> > as well..
> 
> No, we don't want to take any BOs of the LRU unless they are pinned.
> 
> Adding a TT object or populating it doesn't necessarily put the BO
> back 
> to the LRU.

OK, but swapped bos are also taken off the LRU list so these
unpopulated bos are just taking the same path. Only difference to
swapped is that they don't get read back on re-populate, but typically
cleared.

But what would be the point of keeping swapped-out bos on the LRU
list?, particularly when we're iterating under a spinlock?
Shouldn't we try to re-add to LRU (if not already on an LRU) just
before populating? There aren't really that many calls in core TTM.

/Thomas





> 
> Christian.
> 
> > 
> > /Thomas
> > 
> 


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [Intel-gfx] ✗ Fi.CI.BUILD: failure for drm/ttm: Fix swapping dereferences of freed memory
  2021-05-27 14:19 ` [Intel-gfx] " Thomas Hellström
  (?)
  (?)
@ 2021-05-27 19:58 ` Patchwork
  -1 siblings, 0 replies; 25+ messages in thread
From: Patchwork @ 2021-05-27 19:58 UTC (permalink / raw)
  To: Thomas Hellström; +Cc: intel-gfx

== Series Details ==

Series: drm/ttm: Fix swapping dereferences of freed memory
URL   : https://patchwork.freedesktop.org/series/90673/
State : failure

== Summary ==

Applying: drm/ttm: Fix swapping dereferences of freed memory
error: sha1 information is lacking or useless (drivers/gpu/drm/ttm/ttm_bo.c).
error: could not build fake ancestor
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0001 drm/ttm: Fix swapping dereferences of freed memory
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
  2021-05-27 15:51           ` [Intel-gfx] " Thomas Hellström
@ 2021-05-28  7:16             ` Christian König
  -1 siblings, 0 replies; 25+ messages in thread
From: Christian König @ 2021-05-28  7:16 UTC (permalink / raw)
  To: Thomas Hellström, intel-gfx, dri-devel

Am 27.05.21 um 17:51 schrieb Thomas Hellström:
> On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
>> Am 27.05.21 um 17:05 schrieb Thomas Hellström:
>>> On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
>>>> On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
>>>>> Am 27.05.21 um 16:19 schrieb Thomas Hellström:
>>>>>> The swapping code was dereference bo->ttm pointers without
>>>>>> having
>>>>>> the
>>>>>> dma-resv lock held. Also it might try to swap out unpopulated
>>>>>> bos.
>>>>>>
>>>>>> Fix this by moving the bo->ttm dereference until we have the
>>>>>> reservation
>>>>>> lock. Check that the ttm_tt is populated after the
>>>>>> swap_notify
>>>>>> callback.
>>>>>>
>>>>>> Signed-off-by: Thomas Hellström
>>>>>> <thomas.hellstrom@linux.intel.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
>>>>>>     drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
>>>>>>     2 files changed, 18 insertions(+), 6 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>> index 9f53506a82fc..86213d37657b 100644
>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
>>>>>> ttm_buffer_object
>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>           if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place,
>>>>>> &locked, NULL))
>>>>>>                   return -EBUSY;
>>>>>>     
>>>>>> +       dma_resv_assert_held(bo->base.resv);
>>>>>> +
>>>>>> +       if (!bo->ttm ||
>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
>>>>>> +               if (locked)
>>>>>> +                       dma_resv_unlock(bo->base.resv);
>>>>>> +               return -EBUSY;
>>>>>> +       }
>>>>>> +
>>>>>>           if (!ttm_bo_get_unless_zero(bo)) {
>>>>>>                   if (locked)
>>>>>>                           dma_resv_unlock(bo->base.resv);
>>>>>> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct
>>>>>> ttm_buffer_object
>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>           if (bo->bdev->funcs->swap_notify)
>>>>>>                   bo->bdev->funcs->swap_notify(bo);
>>>>>>     
>>>>>> -       ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
>>>>>> +       if (ttm_tt_is_populated(bo->ttm))
>>>>>> +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>>>> gfp_flags);
>>>>> Exactly that is what I won't recommend. We would try to swap
>>>>> out
>>>>> the
>>>>> same BO over and over again with that.
>>>> But we wouldn't since the BO is taken off the LRU and never re-
>>>> added,
>>>>
>>>>
>>> In fact, we'd probably might want to take the !bo->ttm bos off the
>>> LRU
>>> as well..
>> No, we don't want to take any BOs of the LRU unless they are pinned.
>>
>> Adding a TT object or populating it doesn't necessarily put the BO
>> back
>> to the LRU.
> OK, but swapped bos are also taken off the LRU list so these
> unpopulated bos are just taking the same path. Only difference to
> swapped is that they don't get read back on re-populate, but typically
> cleared.
>
> But what would be the point of keeping swapped-out bos on the LRU
> list?, particularly when we're iterating under a spinlock?
> Shouldn't we try to re-add to LRU (if not already on an LRU) just
> before populating? There aren't really that many calls in core TTM.

I want to avoid removing BOs from the LRU as much as possible since we 
forgot on multiple places that we want to re-add them.

Conceptual I think the swapped BOs should have a separate memory domain, 
this way we can ignore them cleanly when swapping things out.

Going to pick this patch up, modifying it a bit more and then pushing it 
to drm-misc-fixes for upstreaming.

Thanks,
Christian.

>
> /Thomas
>
>
>
>
>
>> Christian.
>>
>>> /Thomas
>>>
>


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Intel-gfx] [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
@ 2021-05-28  7:16             ` Christian König
  0 siblings, 0 replies; 25+ messages in thread
From: Christian König @ 2021-05-28  7:16 UTC (permalink / raw)
  To: Thomas Hellström, intel-gfx, dri-devel

Am 27.05.21 um 17:51 schrieb Thomas Hellström:
> On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
>> Am 27.05.21 um 17:05 schrieb Thomas Hellström:
>>> On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
>>>> On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
>>>>> Am 27.05.21 um 16:19 schrieb Thomas Hellström:
>>>>>> The swapping code was dereference bo->ttm pointers without
>>>>>> having
>>>>>> the
>>>>>> dma-resv lock held. Also it might try to swap out unpopulated
>>>>>> bos.
>>>>>>
>>>>>> Fix this by moving the bo->ttm dereference until we have the
>>>>>> reservation
>>>>>> lock. Check that the ttm_tt is populated after the
>>>>>> swap_notify
>>>>>> callback.
>>>>>>
>>>>>> Signed-off-by: Thomas Hellström
>>>>>> <thomas.hellstrom@linux.intel.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/ttm/ttm_bo.c     | 16 +++++++++++++++-
>>>>>>     drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
>>>>>>     2 files changed, 18 insertions(+), 6 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>> index 9f53506a82fc..86213d37657b 100644
>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
>>>>>> ttm_buffer_object
>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>           if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place,
>>>>>> &locked, NULL))
>>>>>>                   return -EBUSY;
>>>>>>     
>>>>>> +       dma_resv_assert_held(bo->base.resv);
>>>>>> +
>>>>>> +       if (!bo->ttm ||
>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) {
>>>>>> +               if (locked)
>>>>>> +                       dma_resv_unlock(bo->base.resv);
>>>>>> +               return -EBUSY;
>>>>>> +       }
>>>>>> +
>>>>>>           if (!ttm_bo_get_unless_zero(bo)) {
>>>>>>                   if (locked)
>>>>>>                           dma_resv_unlock(bo->base.resv);
>>>>>> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct
>>>>>> ttm_buffer_object
>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>           if (bo->bdev->funcs->swap_notify)
>>>>>>                   bo->bdev->funcs->swap_notify(bo);
>>>>>>     
>>>>>> -       ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags);
>>>>>> +       if (ttm_tt_is_populated(bo->ttm))
>>>>>> +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>>>> gfp_flags);
>>>>> Exactly that is what I won't recommend. We would try to swap
>>>>> out
>>>>> the
>>>>> same BO over and over again with that.
>>>> But we wouldn't since the BO is taken off the LRU and never re-
>>>> added,
>>>>
>>>>
>>> In fact, we'd probably might want to take the !bo->ttm bos off the
>>> LRU
>>> as well..
>> No, we don't want to take any BOs of the LRU unless they are pinned.
>>
>> Adding a TT object or populating it doesn't necessarily put the BO
>> back
>> to the LRU.
> OK, but swapped bos are also taken off the LRU list so these
> unpopulated bos are just taking the same path. Only difference to
> swapped is that they don't get read back on re-populate, but typically
> cleared.
>
> But what would be the point of keeping swapped-out bos on the LRU
> list?, particularly when we're iterating under a spinlock?
> Shouldn't we try to re-add to LRU (if not already on an LRU) just
> before populating? There aren't really that many calls in core TTM.

I want to avoid removing BOs from the LRU as much as possible since we 
forgot on multiple places that we want to re-add them.

Conceptual I think the swapped BOs should have a separate memory domain, 
this way we can ignore them cleanly when swapping things out.

Going to pick this patch up, modifying it a bit more and then pushing it 
to drm-misc-fixes for upstreaming.

Thanks,
Christian.

>
> /Thomas
>
>
>
>
>
>> Christian.
>>
>>> /Thomas
>>>
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
  2021-05-28  7:16             ` [Intel-gfx] " Christian König
@ 2021-05-28  7:33               ` Thomas Hellström
  -1 siblings, 0 replies; 25+ messages in thread
From: Thomas Hellström @ 2021-05-28  7:33 UTC (permalink / raw)
  To: Christian König, intel-gfx, dri-devel

On Fri, 2021-05-28 at 09:16 +0200, Christian König wrote:
> Am 27.05.21 um 17:51 schrieb Thomas Hellström:
> > On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
> > > Am 27.05.21 um 17:05 schrieb Thomas Hellström:
> > > > On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
> > > > > On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
> > > > > > Am 27.05.21 um 16:19 schrieb Thomas Hellström:
> > > > > > > The swapping code was dereference bo->ttm pointers
> > > > > > > without
> > > > > > > having
> > > > > > > the
> > > > > > > dma-resv lock held. Also it might try to swap out
> > > > > > > unpopulated
> > > > > > > bos.
> > > > > > > 
> > > > > > > Fix this by moving the bo->ttm dereference until we have
> > > > > > > the
> > > > > > > reservation
> > > > > > > lock. Check that the ttm_tt is populated after the
> > > > > > > swap_notify
> > > > > > > callback.
> > > > > > > 
> > > > > > > Signed-off-by: Thomas Hellström
> > > > > > > <thomas.hellstrom@linux.intel.com>
> > > > > > > ---
> > > > > > >     drivers/gpu/drm/ttm/ttm_bo.c     | 16
> > > > > > > +++++++++++++++-
> > > > > > >     drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
> > > > > > >     2 files changed, 18 insertions(+), 6 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > > > b/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > > > index 9f53506a82fc..86213d37657b 100644
> > > > > > > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > > > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > > > @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
> > > > > > > ttm_buffer_object
> > > > > > > *bo, struct ttm_operation_ctx *ctx,
> > > > > > >           if (!ttm_bo_evict_swapout_allowable(bo, ctx,
> > > > > > > &place,
> > > > > > > &locked, NULL))
> > > > > > >                   return -EBUSY;
> > > > > > >     
> > > > > > > +       dma_resv_assert_held(bo->base.resv);
> > > > > > > +
> > > > > > > +       if (!bo->ttm ||
> > > > > > > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> > > > > > > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
> > > > > > > {
> > > > > > > +               if (locked)
> > > > > > > +                       dma_resv_unlock(bo->base.resv);
> > > > > > > +               return -EBUSY;
> > > > > > > +       }
> > > > > > > +
> > > > > > >           if (!ttm_bo_get_unless_zero(bo)) {
> > > > > > >                   if (locked)
> > > > > > >                           dma_resv_unlock(bo->base.resv);
> > > > > > > @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct
> > > > > > > ttm_buffer_object
> > > > > > > *bo, struct ttm_operation_ctx *ctx,
> > > > > > >           if (bo->bdev->funcs->swap_notify)
> > > > > > >                   bo->bdev->funcs->swap_notify(bo);
> > > > > > >     
> > > > > > > -       ret = ttm_tt_swapout(bo->bdev, bo->ttm,
> > > > > > > gfp_flags);
> > > > > > > +       if (ttm_tt_is_populated(bo->ttm))
> > > > > > > +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
> > > > > > > gfp_flags);
> > > > > > Exactly that is what I won't recommend. We would try to
> > > > > > swap
> > > > > > out
> > > > > > the
> > > > > > same BO over and over again with that.
> > > > > But we wouldn't since the BO is taken off the LRU and never
> > > > > re-
> > > > > added,
> > > > > 
> > > > > 
> > > > In fact, we'd probably might want to take the !bo->ttm bos off
> > > > the
> > > > LRU
> > > > as well..
> > > No, we don't want to take any BOs of the LRU unless they are
> > > pinned.
> > > 
> > > Adding a TT object or populating it doesn't necessarily put the
> > > BO
> > > back
> > > to the LRU.
> > OK, but swapped bos are also taken off the LRU list so these
> > unpopulated bos are just taking the same path. Only difference to
> > swapped is that they don't get read back on re-populate, but
> > typically
> > cleared.
> > 
> > But what would be the point of keeping swapped-out bos on the LRU
> > list?, particularly when we're iterating under a spinlock?
> > Shouldn't we try to re-add to LRU (if not already on an LRU) just
> > before populating? There aren't really that many calls in core TTM.
> 
> I want to avoid removing BOs from the LRU as much as possible since
> we 
> forgot on multiple places that we want to re-add them.
> 
> Conceptual I think the swapped BOs should have a separate memory
> domain, 
> this way we can ignore them cleanly when swapping things out.

Yes, that would of course work as well. Keeping them on the system LRU
is IMO highly undesirable.

> 
> Going to pick this patch up, modifying it a bit more and then pushing
> it 
> to drm-misc-fixes for upstreaming.

OK, I dropped the TTM fix for the purge-in-swap-notify from the i915
series, hoping that the reworked variant of this patch lands first.

Thanks,
Thomas

> 
> Thanks,
> Christian.
> 
> > 
> > /Thomas
> > 
> > 
> > 
> > 
> > 
> > > Christian.
> > > 
> > > > /Thomas
> > > > 
> > 
> 



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Intel-gfx] [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
@ 2021-05-28  7:33               ` Thomas Hellström
  0 siblings, 0 replies; 25+ messages in thread
From: Thomas Hellström @ 2021-05-28  7:33 UTC (permalink / raw)
  To: Christian König, intel-gfx, dri-devel

On Fri, 2021-05-28 at 09:16 +0200, Christian König wrote:
> Am 27.05.21 um 17:51 schrieb Thomas Hellström:
> > On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
> > > Am 27.05.21 um 17:05 schrieb Thomas Hellström:
> > > > On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
> > > > > On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
> > > > > > Am 27.05.21 um 16:19 schrieb Thomas Hellström:
> > > > > > > The swapping code was dereference bo->ttm pointers
> > > > > > > without
> > > > > > > having
> > > > > > > the
> > > > > > > dma-resv lock held. Also it might try to swap out
> > > > > > > unpopulated
> > > > > > > bos.
> > > > > > > 
> > > > > > > Fix this by moving the bo->ttm dereference until we have
> > > > > > > the
> > > > > > > reservation
> > > > > > > lock. Check that the ttm_tt is populated after the
> > > > > > > swap_notify
> > > > > > > callback.
> > > > > > > 
> > > > > > > Signed-off-by: Thomas Hellström
> > > > > > > <thomas.hellstrom@linux.intel.com>
> > > > > > > ---
> > > > > > >     drivers/gpu/drm/ttm/ttm_bo.c     | 16
> > > > > > > +++++++++++++++-
> > > > > > >     drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
> > > > > > >     2 files changed, 18 insertions(+), 6 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > > > b/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > > > index 9f53506a82fc..86213d37657b 100644
> > > > > > > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > > > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > > > > > > @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
> > > > > > > ttm_buffer_object
> > > > > > > *bo, struct ttm_operation_ctx *ctx,
> > > > > > >           if (!ttm_bo_evict_swapout_allowable(bo, ctx,
> > > > > > > &place,
> > > > > > > &locked, NULL))
> > > > > > >                   return -EBUSY;
> > > > > > >     
> > > > > > > +       dma_resv_assert_held(bo->base.resv);
> > > > > > > +
> > > > > > > +       if (!bo->ttm ||
> > > > > > > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
> > > > > > > +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
> > > > > > > {
> > > > > > > +               if (locked)
> > > > > > > +                       dma_resv_unlock(bo->base.resv);
> > > > > > > +               return -EBUSY;
> > > > > > > +       }
> > > > > > > +
> > > > > > >           if (!ttm_bo_get_unless_zero(bo)) {
> > > > > > >                   if (locked)
> > > > > > >                           dma_resv_unlock(bo->base.resv);
> > > > > > > @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct
> > > > > > > ttm_buffer_object
> > > > > > > *bo, struct ttm_operation_ctx *ctx,
> > > > > > >           if (bo->bdev->funcs->swap_notify)
> > > > > > >                   bo->bdev->funcs->swap_notify(bo);
> > > > > > >     
> > > > > > > -       ret = ttm_tt_swapout(bo->bdev, bo->ttm,
> > > > > > > gfp_flags);
> > > > > > > +       if (ttm_tt_is_populated(bo->ttm))
> > > > > > > +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
> > > > > > > gfp_flags);
> > > > > > Exactly that is what I won't recommend. We would try to
> > > > > > swap
> > > > > > out
> > > > > > the
> > > > > > same BO over and over again with that.
> > > > > But we wouldn't since the BO is taken off the LRU and never
> > > > > re-
> > > > > added,
> > > > > 
> > > > > 
> > > > In fact, we'd probably might want to take the !bo->ttm bos off
> > > > the
> > > > LRU
> > > > as well..
> > > No, we don't want to take any BOs of the LRU unless they are
> > > pinned.
> > > 
> > > Adding a TT object or populating it doesn't necessarily put the
> > > BO
> > > back
> > > to the LRU.
> > OK, but swapped bos are also taken off the LRU list so these
> > unpopulated bos are just taking the same path. Only difference to
> > swapped is that they don't get read back on re-populate, but
> > typically
> > cleared.
> > 
> > But what would be the point of keeping swapped-out bos on the LRU
> > list?, particularly when we're iterating under a spinlock?
> > Shouldn't we try to re-add to LRU (if not already on an LRU) just
> > before populating? There aren't really that many calls in core TTM.
> 
> I want to avoid removing BOs from the LRU as much as possible since
> we 
> forgot on multiple places that we want to re-add them.
> 
> Conceptual I think the swapped BOs should have a separate memory
> domain, 
> this way we can ignore them cleanly when swapping things out.

Yes, that would of course work as well. Keeping them on the system LRU
is IMO highly undesirable.

> 
> Going to pick this patch up, modifying it a bit more and then pushing
> it 
> to drm-misc-fixes for upstreaming.

OK, I dropped the TTM fix for the purge-in-swap-notify from the i915
series, hoping that the reworked variant of this patch lands first.

Thanks,
Thomas

> 
> Thanks,
> Christian.
> 
> > 
> > /Thomas
> > 
> > 
> > 
> > 
> > 
> > > Christian.
> > > 
> > > > /Thomas
> > > > 
> > 
> 


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
  2021-05-28  7:33               ` [Intel-gfx] " Thomas Hellström
@ 2021-05-28 14:10                 ` Christian König
  -1 siblings, 0 replies; 25+ messages in thread
From: Christian König @ 2021-05-28 14:10 UTC (permalink / raw)
  To: Thomas Hellström, intel-gfx, dri-devel

Am 28.05.21 um 09:33 schrieb Thomas Hellström:
> On Fri, 2021-05-28 at 09:16 +0200, Christian König wrote:
>> Am 27.05.21 um 17:51 schrieb Thomas Hellström:
>>> On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
>>>> Am 27.05.21 um 17:05 schrieb Thomas Hellström:
>>>>> On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
>>>>>> On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
>>>>>>> Am 27.05.21 um 16:19 schrieb Thomas Hellström:
>>>>>>>> The swapping code was dereference bo->ttm pointers
>>>>>>>> without
>>>>>>>> having
>>>>>>>> the
>>>>>>>> dma-resv lock held. Also it might try to swap out
>>>>>>>> unpopulated
>>>>>>>> bos.
>>>>>>>>
>>>>>>>> Fix this by moving the bo->ttm dereference until we have
>>>>>>>> the
>>>>>>>> reservation
>>>>>>>> lock. Check that the ttm_tt is populated after the
>>>>>>>> swap_notify
>>>>>>>> callback.
>>>>>>>>
>>>>>>>> Signed-off-by: Thomas Hellström
>>>>>>>> <thomas.hellstrom@linux.intel.com>
>>>>>>>> ---
>>>>>>>>      drivers/gpu/drm/ttm/ttm_bo.c     | 16
>>>>>>>> +++++++++++++++-
>>>>>>>>      drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
>>>>>>>>      2 files changed, 18 insertions(+), 6 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>> index 9f53506a82fc..86213d37657b 100644
>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
>>>>>>>> ttm_buffer_object
>>>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>>>            if (!ttm_bo_evict_swapout_allowable(bo, ctx,
>>>>>>>> &place,
>>>>>>>> &locked, NULL))
>>>>>>>>                    return -EBUSY;
>>>>>>>>      
>>>>>>>> +       dma_resv_assert_held(bo->base.resv);
>>>>>>>> +
>>>>>>>> +       if (!bo->ttm ||
>>>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
>>>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
>>>>>>>> {
>>>>>>>> +               if (locked)
>>>>>>>> +                       dma_resv_unlock(bo->base.resv);
>>>>>>>> +               return -EBUSY;
>>>>>>>> +       }
>>>>>>>> +
>>>>>>>>            if (!ttm_bo_get_unless_zero(bo)) {
>>>>>>>>                    if (locked)
>>>>>>>>                            dma_resv_unlock(bo->base.resv);
>>>>>>>> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct
>>>>>>>> ttm_buffer_object
>>>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>>>            if (bo->bdev->funcs->swap_notify)
>>>>>>>>                    bo->bdev->funcs->swap_notify(bo);
>>>>>>>>      
>>>>>>>> -       ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>>>>>> gfp_flags);
>>>>>>>> +       if (ttm_tt_is_populated(bo->ttm))
>>>>>>>> +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>>>>>> gfp_flags);
>>>>>>> Exactly that is what I won't recommend. We would try to
>>>>>>> swap
>>>>>>> out
>>>>>>> the
>>>>>>> same BO over and over again with that.
>>>>>> But we wouldn't since the BO is taken off the LRU and never
>>>>>> re-
>>>>>> added,
>>>>>>
>>>>>>
>>>>> In fact, we'd probably might want to take the !bo->ttm bos off
>>>>> the
>>>>> LRU
>>>>> as well..
>>>> No, we don't want to take any BOs of the LRU unless they are
>>>> pinned.
>>>>
>>>> Adding a TT object or populating it doesn't necessarily put the
>>>> BO
>>>> back
>>>> to the LRU.
>>> OK, but swapped bos are also taken off the LRU list so these
>>> unpopulated bos are just taking the same path. Only difference to
>>> swapped is that they don't get read back on re-populate, but
>>> typically
>>> cleared.
>>>
>>> But what would be the point of keeping swapped-out bos on the LRU
>>> list?, particularly when we're iterating under a spinlock?
>>> Shouldn't we try to re-add to LRU (if not already on an LRU) just
>>> before populating? There aren't really that many calls in core TTM.
>> I want to avoid removing BOs from the LRU as much as possible since
>> we
>> forgot on multiple places that we want to re-add them.
>>
>> Conceptual I think the swapped BOs should have a separate memory
>> domain,
>> this way we can ignore them cleanly when swapping things out.
> Yes, that would of course work as well. Keeping them on the system LRU
> is IMO highly undesirable.
>
>> Going to pick this patch up, modifying it a bit more and then pushing
>> it
>> to drm-misc-fixes for upstreaming.
> OK, I dropped the TTM fix for the purge-in-swap-notify from the i915
> series, hoping that the reworked variant of this patch lands first.

You will still need to add the second ttm_tt_populated() check since I 
dropped that for the back which I want to push to -fixes.

Regards,
Christian.

>
> Thanks,
> Thomas
>
>> Thanks,
>> Christian.
>>
>>> /Thomas
>>>
>>>
>>>
>>>
>>>
>>>> Christian.
>>>>
>>>>> /Thomas
>>>>>
>


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Intel-gfx] [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
@ 2021-05-28 14:10                 ` Christian König
  0 siblings, 0 replies; 25+ messages in thread
From: Christian König @ 2021-05-28 14:10 UTC (permalink / raw)
  To: Thomas Hellström, intel-gfx, dri-devel

Am 28.05.21 um 09:33 schrieb Thomas Hellström:
> On Fri, 2021-05-28 at 09:16 +0200, Christian König wrote:
>> Am 27.05.21 um 17:51 schrieb Thomas Hellström:
>>> On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
>>>> Am 27.05.21 um 17:05 schrieb Thomas Hellström:
>>>>> On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
>>>>>> On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
>>>>>>> Am 27.05.21 um 16:19 schrieb Thomas Hellström:
>>>>>>>> The swapping code was dereference bo->ttm pointers
>>>>>>>> without
>>>>>>>> having
>>>>>>>> the
>>>>>>>> dma-resv lock held. Also it might try to swap out
>>>>>>>> unpopulated
>>>>>>>> bos.
>>>>>>>>
>>>>>>>> Fix this by moving the bo->ttm dereference until we have
>>>>>>>> the
>>>>>>>> reservation
>>>>>>>> lock. Check that the ttm_tt is populated after the
>>>>>>>> swap_notify
>>>>>>>> callback.
>>>>>>>>
>>>>>>>> Signed-off-by: Thomas Hellström
>>>>>>>> <thomas.hellstrom@linux.intel.com>
>>>>>>>> ---
>>>>>>>>      drivers/gpu/drm/ttm/ttm_bo.c     | 16
>>>>>>>> +++++++++++++++-
>>>>>>>>      drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
>>>>>>>>      2 files changed, 18 insertions(+), 6 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>> index 9f53506a82fc..86213d37657b 100644
>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
>>>>>>>> ttm_buffer_object
>>>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>>>            if (!ttm_bo_evict_swapout_allowable(bo, ctx,
>>>>>>>> &place,
>>>>>>>> &locked, NULL))
>>>>>>>>                    return -EBUSY;
>>>>>>>>      
>>>>>>>> +       dma_resv_assert_held(bo->base.resv);
>>>>>>>> +
>>>>>>>> +       if (!bo->ttm ||
>>>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
>>>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
>>>>>>>> {
>>>>>>>> +               if (locked)
>>>>>>>> +                       dma_resv_unlock(bo->base.resv);
>>>>>>>> +               return -EBUSY;
>>>>>>>> +       }
>>>>>>>> +
>>>>>>>>            if (!ttm_bo_get_unless_zero(bo)) {
>>>>>>>>                    if (locked)
>>>>>>>>                            dma_resv_unlock(bo->base.resv);
>>>>>>>> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct
>>>>>>>> ttm_buffer_object
>>>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>>>            if (bo->bdev->funcs->swap_notify)
>>>>>>>>                    bo->bdev->funcs->swap_notify(bo);
>>>>>>>>      
>>>>>>>> -       ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>>>>>> gfp_flags);
>>>>>>>> +       if (ttm_tt_is_populated(bo->ttm))
>>>>>>>> +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>>>>>> gfp_flags);
>>>>>>> Exactly that is what I won't recommend. We would try to
>>>>>>> swap
>>>>>>> out
>>>>>>> the
>>>>>>> same BO over and over again with that.
>>>>>> But we wouldn't since the BO is taken off the LRU and never
>>>>>> re-
>>>>>> added,
>>>>>>
>>>>>>
>>>>> In fact, we'd probably might want to take the !bo->ttm bos off
>>>>> the
>>>>> LRU
>>>>> as well..
>>>> No, we don't want to take any BOs of the LRU unless they are
>>>> pinned.
>>>>
>>>> Adding a TT object or populating it doesn't necessarily put the
>>>> BO
>>>> back
>>>> to the LRU.
>>> OK, but swapped bos are also taken off the LRU list so these
>>> unpopulated bos are just taking the same path. Only difference to
>>> swapped is that they don't get read back on re-populate, but
>>> typically
>>> cleared.
>>>
>>> But what would be the point of keeping swapped-out bos on the LRU
>>> list?, particularly when we're iterating under a spinlock?
>>> Shouldn't we try to re-add to LRU (if not already on an LRU) just
>>> before populating? There aren't really that many calls in core TTM.
>> I want to avoid removing BOs from the LRU as much as possible since
>> we
>> forgot on multiple places that we want to re-add them.
>>
>> Conceptual I think the swapped BOs should have a separate memory
>> domain,
>> this way we can ignore them cleanly when swapping things out.
> Yes, that would of course work as well. Keeping them on the system LRU
> is IMO highly undesirable.
>
>> Going to pick this patch up, modifying it a bit more and then pushing
>> it
>> to drm-misc-fixes for upstreaming.
> OK, I dropped the TTM fix for the purge-in-swap-notify from the i915
> series, hoping that the reworked variant of this patch lands first.

You will still need to add the second ttm_tt_populated() check since I 
dropped that for the back which I want to push to -fixes.

Regards,
Christian.

>
> Thanks,
> Thomas
>
>> Thanks,
>> Christian.
>>
>>> /Thomas
>>>
>>>
>>>
>>>
>>>
>>>> Christian.
>>>>
>>>>> /Thomas
>>>>>
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
  2021-05-28 14:10                 ` [Intel-gfx] " Christian König
@ 2021-05-28 14:17                   ` Thomas Hellström
  -1 siblings, 0 replies; 25+ messages in thread
From: Thomas Hellström @ 2021-05-28 14:17 UTC (permalink / raw)
  To: Christian König, intel-gfx, dri-devel


On 5/28/21 4:10 PM, Christian König wrote:
> Am 28.05.21 um 09:33 schrieb Thomas Hellström:
>> On Fri, 2021-05-28 at 09:16 +0200, Christian König wrote:
>>> Am 27.05.21 um 17:51 schrieb Thomas Hellström:
>>>> On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
>>>>> Am 27.05.21 um 17:05 schrieb Thomas Hellström:
>>>>>> On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
>>>>>>> On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
>>>>>>>> Am 27.05.21 um 16:19 schrieb Thomas Hellström:
>>>>>>>>> The swapping code was dereference bo->ttm pointers
>>>>>>>>> without
>>>>>>>>> having
>>>>>>>>> the
>>>>>>>>> dma-resv lock held. Also it might try to swap out
>>>>>>>>> unpopulated
>>>>>>>>> bos.
>>>>>>>>>
>>>>>>>>> Fix this by moving the bo->ttm dereference until we have
>>>>>>>>> the
>>>>>>>>> reservation
>>>>>>>>> lock. Check that the ttm_tt is populated after the
>>>>>>>>> swap_notify
>>>>>>>>> callback.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Thomas Hellström
>>>>>>>>> <thomas.hellstrom@linux.intel.com>
>>>>>>>>> ---
>>>>>>>>>      drivers/gpu/drm/ttm/ttm_bo.c     | 16
>>>>>>>>> +++++++++++++++-
>>>>>>>>>      drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
>>>>>>>>>      2 files changed, 18 insertions(+), 6 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>> index 9f53506a82fc..86213d37657b 100644
>>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
>>>>>>>>> ttm_buffer_object
>>>>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>>>>            if (!ttm_bo_evict_swapout_allowable(bo, ctx,
>>>>>>>>> &place,
>>>>>>>>> &locked, NULL))
>>>>>>>>>                    return -EBUSY;
>>>>>>>>> +       dma_resv_assert_held(bo->base.resv);
>>>>>>>>> +
>>>>>>>>> +       if (!bo->ttm ||
>>>>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
>>>>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
>>>>>>>>> {
>>>>>>>>> +               if (locked)
>>>>>>>>> +                       dma_resv_unlock(bo->base.resv);
>>>>>>>>> +               return -EBUSY;
>>>>>>>>> +       }
>>>>>>>>> +
>>>>>>>>>            if (!ttm_bo_get_unless_zero(bo)) {
>>>>>>>>>                    if (locked)
>>>>>>>>>                            dma_resv_unlock(bo->base.resv);
>>>>>>>>> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct
>>>>>>>>> ttm_buffer_object
>>>>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>>>>            if (bo->bdev->funcs->swap_notify)
>>>>>>>>>                    bo->bdev->funcs->swap_notify(bo);
>>>>>>>>>      -       ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>>>>>>> gfp_flags);
>>>>>>>>> +       if (ttm_tt_is_populated(bo->ttm))
>>>>>>>>> +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>>>>>>> gfp_flags);
>>>>>>>> Exactly that is what I won't recommend. We would try to
>>>>>>>> swap
>>>>>>>> out
>>>>>>>> the
>>>>>>>> same BO over and over again with that.
>>>>>>> But we wouldn't since the BO is taken off the LRU and never
>>>>>>> re-
>>>>>>> added,
>>>>>>>
>>>>>>>
>>>>>> In fact, we'd probably might want to take the !bo->ttm bos off
>>>>>> the
>>>>>> LRU
>>>>>> as well..
>>>>> No, we don't want to take any BOs of the LRU unless they are
>>>>> pinned.
>>>>>
>>>>> Adding a TT object or populating it doesn't necessarily put the
>>>>> BO
>>>>> back
>>>>> to the LRU.
>>>> OK, but swapped bos are also taken off the LRU list so these
>>>> unpopulated bos are just taking the same path. Only difference to
>>>> swapped is that they don't get read back on re-populate, but
>>>> typically
>>>> cleared.
>>>>
>>>> But what would be the point of keeping swapped-out bos on the LRU
>>>> list?, particularly when we're iterating under a spinlock?
>>>> Shouldn't we try to re-add to LRU (if not already on an LRU) just
>>>> before populating? There aren't really that many calls in core TTM.
>>> I want to avoid removing BOs from the LRU as much as possible since
>>> we
>>> forgot on multiple places that we want to re-add them.
>>>
>>> Conceptual I think the swapped BOs should have a separate memory
>>> domain,
>>> this way we can ignore them cleanly when swapping things out.
>> Yes, that would of course work as well. Keeping them on the system LRU
>> is IMO highly undesirable.
>>
>>> Going to pick this patch up, modifying it a bit more and then pushing
>>> it
>>> to drm-misc-fixes for upstreaming.
>> OK, I dropped the TTM fix for the purge-in-swap-notify from the i915
>> series, hoping that the reworked variant of this patch lands first.
>
> You will still need to add the second ttm_tt_populated() check since I 
> dropped that for the back which I want to push to -fixes.
>
> Regards,
> Christian.
>
OK, great. then you have my S-O-B on this patch.

BTW that original patch that added the ttm_tt_is_populated() was 
considered "LGTM" by you, except for this ttm_tt_is_populated(). So do I 
have an Acked-by: on that now?

That is

https://patchwork.freedesktop.org/patch/435833/?series=90681&rev=2

plus the check added?

Thanks,

Thomas




>>
>> Thanks,
>> Thomas
>>
>>> Thanks,
>>> Christian.
>>>
>>>> /Thomas
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>> Christian.
>>>>>
>>>>>> /Thomas
>>>>>>
>>
>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Intel-gfx] [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
@ 2021-05-28 14:17                   ` Thomas Hellström
  0 siblings, 0 replies; 25+ messages in thread
From: Thomas Hellström @ 2021-05-28 14:17 UTC (permalink / raw)
  To: Christian König, intel-gfx, dri-devel


On 5/28/21 4:10 PM, Christian König wrote:
> Am 28.05.21 um 09:33 schrieb Thomas Hellström:
>> On Fri, 2021-05-28 at 09:16 +0200, Christian König wrote:
>>> Am 27.05.21 um 17:51 schrieb Thomas Hellström:
>>>> On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
>>>>> Am 27.05.21 um 17:05 schrieb Thomas Hellström:
>>>>>> On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
>>>>>>> On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
>>>>>>>> Am 27.05.21 um 16:19 schrieb Thomas Hellström:
>>>>>>>>> The swapping code was dereference bo->ttm pointers
>>>>>>>>> without
>>>>>>>>> having
>>>>>>>>> the
>>>>>>>>> dma-resv lock held. Also it might try to swap out
>>>>>>>>> unpopulated
>>>>>>>>> bos.
>>>>>>>>>
>>>>>>>>> Fix this by moving the bo->ttm dereference until we have
>>>>>>>>> the
>>>>>>>>> reservation
>>>>>>>>> lock. Check that the ttm_tt is populated after the
>>>>>>>>> swap_notify
>>>>>>>>> callback.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Thomas Hellström
>>>>>>>>> <thomas.hellstrom@linux.intel.com>
>>>>>>>>> ---
>>>>>>>>>      drivers/gpu/drm/ttm/ttm_bo.c     | 16
>>>>>>>>> +++++++++++++++-
>>>>>>>>>      drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
>>>>>>>>>      2 files changed, 18 insertions(+), 6 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>> index 9f53506a82fc..86213d37657b 100644
>>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
>>>>>>>>> ttm_buffer_object
>>>>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>>>>            if (!ttm_bo_evict_swapout_allowable(bo, ctx,
>>>>>>>>> &place,
>>>>>>>>> &locked, NULL))
>>>>>>>>>                    return -EBUSY;
>>>>>>>>> +       dma_resv_assert_held(bo->base.resv);
>>>>>>>>> +
>>>>>>>>> +       if (!bo->ttm ||
>>>>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
>>>>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
>>>>>>>>> {
>>>>>>>>> +               if (locked)
>>>>>>>>> +                       dma_resv_unlock(bo->base.resv);
>>>>>>>>> +               return -EBUSY;
>>>>>>>>> +       }
>>>>>>>>> +
>>>>>>>>>            if (!ttm_bo_get_unless_zero(bo)) {
>>>>>>>>>                    if (locked)
>>>>>>>>>                            dma_resv_unlock(bo->base.resv);
>>>>>>>>> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct
>>>>>>>>> ttm_buffer_object
>>>>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>>>>            if (bo->bdev->funcs->swap_notify)
>>>>>>>>>                    bo->bdev->funcs->swap_notify(bo);
>>>>>>>>>      -       ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>>>>>>> gfp_flags);
>>>>>>>>> +       if (ttm_tt_is_populated(bo->ttm))
>>>>>>>>> +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>>>>>>> gfp_flags);
>>>>>>>> Exactly that is what I won't recommend. We would try to
>>>>>>>> swap
>>>>>>>> out
>>>>>>>> the
>>>>>>>> same BO over and over again with that.
>>>>>>> But we wouldn't since the BO is taken off the LRU and never
>>>>>>> re-
>>>>>>> added,
>>>>>>>
>>>>>>>
>>>>>> In fact, we'd probably might want to take the !bo->ttm bos off
>>>>>> the
>>>>>> LRU
>>>>>> as well..
>>>>> No, we don't want to take any BOs of the LRU unless they are
>>>>> pinned.
>>>>>
>>>>> Adding a TT object or populating it doesn't necessarily put the
>>>>> BO
>>>>> back
>>>>> to the LRU.
>>>> OK, but swapped bos are also taken off the LRU list so these
>>>> unpopulated bos are just taking the same path. Only difference to
>>>> swapped is that they don't get read back on re-populate, but
>>>> typically
>>>> cleared.
>>>>
>>>> But what would be the point of keeping swapped-out bos on the LRU
>>>> list?, particularly when we're iterating under a spinlock?
>>>> Shouldn't we try to re-add to LRU (if not already on an LRU) just
>>>> before populating? There aren't really that many calls in core TTM.
>>> I want to avoid removing BOs from the LRU as much as possible since
>>> we
>>> forgot on multiple places that we want to re-add them.
>>>
>>> Conceptual I think the swapped BOs should have a separate memory
>>> domain,
>>> this way we can ignore them cleanly when swapping things out.
>> Yes, that would of course work as well. Keeping them on the system LRU
>> is IMO highly undesirable.
>>
>>> Going to pick this patch up, modifying it a bit more and then pushing
>>> it
>>> to drm-misc-fixes for upstreaming.
>> OK, I dropped the TTM fix for the purge-in-swap-notify from the i915
>> series, hoping that the reworked variant of this patch lands first.
>
> You will still need to add the second ttm_tt_populated() check since I 
> dropped that for the back which I want to push to -fixes.
>
> Regards,
> Christian.
>
OK, great. then you have my S-O-B on this patch.

BTW that original patch that added the ttm_tt_is_populated() was 
considered "LGTM" by you, except for this ttm_tt_is_populated(). So do I 
have an Acked-by: on that now?

That is

https://patchwork.freedesktop.org/patch/435833/?series=90681&rev=2

plus the check added?

Thanks,

Thomas




>>
>> Thanks,
>> Thomas
>>
>>> Thanks,
>>> Christian.
>>>
>>>> /Thomas
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>> Christian.
>>>>>
>>>>>> /Thomas
>>>>>>
>>
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
  2021-05-28 14:17                   ` [Intel-gfx] " Thomas Hellström
@ 2021-05-28 14:21                     ` Christian König
  -1 siblings, 0 replies; 25+ messages in thread
From: Christian König @ 2021-05-28 14:21 UTC (permalink / raw)
  To: Thomas Hellström, intel-gfx, dri-devel



Am 28.05.21 um 16:17 schrieb Thomas Hellström:
>
> On 5/28/21 4:10 PM, Christian König wrote:
>> Am 28.05.21 um 09:33 schrieb Thomas Hellström:
>>> On Fri, 2021-05-28 at 09:16 +0200, Christian König wrote:
>>>> Am 27.05.21 um 17:51 schrieb Thomas Hellström:
>>>>> On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
>>>>>> Am 27.05.21 um 17:05 schrieb Thomas Hellström:
>>>>>>> On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
>>>>>>>> On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
>>>>>>>>> Am 27.05.21 um 16:19 schrieb Thomas Hellström:
>>>>>>>>>> The swapping code was dereference bo->ttm pointers
>>>>>>>>>> without
>>>>>>>>>> having
>>>>>>>>>> the
>>>>>>>>>> dma-resv lock held. Also it might try to swap out
>>>>>>>>>> unpopulated
>>>>>>>>>> bos.
>>>>>>>>>>
>>>>>>>>>> Fix this by moving the bo->ttm dereference until we have
>>>>>>>>>> the
>>>>>>>>>> reservation
>>>>>>>>>> lock. Check that the ttm_tt is populated after the
>>>>>>>>>> swap_notify
>>>>>>>>>> callback.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Thomas Hellström
>>>>>>>>>> <thomas.hellstrom@linux.intel.com>
>>>>>>>>>> ---
>>>>>>>>>>      drivers/gpu/drm/ttm/ttm_bo.c     | 16
>>>>>>>>>> +++++++++++++++-
>>>>>>>>>>      drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
>>>>>>>>>>      2 files changed, 18 insertions(+), 6 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>>> index 9f53506a82fc..86213d37657b 100644
>>>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>>> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
>>>>>>>>>> ttm_buffer_object
>>>>>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>>>>>            if (!ttm_bo_evict_swapout_allowable(bo, ctx,
>>>>>>>>>> &place,
>>>>>>>>>> &locked, NULL))
>>>>>>>>>>                    return -EBUSY;
>>>>>>>>>> +       dma_resv_assert_held(bo->base.resv);
>>>>>>>>>> +
>>>>>>>>>> +       if (!bo->ttm ||
>>>>>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
>>>>>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
>>>>>>>>>> {
>>>>>>>>>> +               if (locked)
>>>>>>>>>> +                       dma_resv_unlock(bo->base.resv);
>>>>>>>>>> +               return -EBUSY;
>>>>>>>>>> +       }
>>>>>>>>>> +
>>>>>>>>>>            if (!ttm_bo_get_unless_zero(bo)) {
>>>>>>>>>>                    if (locked)
>>>>>>>>>>                            dma_resv_unlock(bo->base.resv);
>>>>>>>>>> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct
>>>>>>>>>> ttm_buffer_object
>>>>>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>>>>>            if (bo->bdev->funcs->swap_notify)
>>>>>>>>>>                    bo->bdev->funcs->swap_notify(bo);
>>>>>>>>>>      -       ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>>>>>>>> gfp_flags);
>>>>>>>>>> +       if (ttm_tt_is_populated(bo->ttm))
>>>>>>>>>> +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>>>>>>>> gfp_flags);
>>>>>>>>> Exactly that is what I won't recommend. We would try to
>>>>>>>>> swap
>>>>>>>>> out
>>>>>>>>> the
>>>>>>>>> same BO over and over again with that.
>>>>>>>> But we wouldn't since the BO is taken off the LRU and never
>>>>>>>> re-
>>>>>>>> added,
>>>>>>>>
>>>>>>>>
>>>>>>> In fact, we'd probably might want to take the !bo->ttm bos off
>>>>>>> the
>>>>>>> LRU
>>>>>>> as well..
>>>>>> No, we don't want to take any BOs of the LRU unless they are
>>>>>> pinned.
>>>>>>
>>>>>> Adding a TT object or populating it doesn't necessarily put the
>>>>>> BO
>>>>>> back
>>>>>> to the LRU.
>>>>> OK, but swapped bos are also taken off the LRU list so these
>>>>> unpopulated bos are just taking the same path. Only difference to
>>>>> swapped is that they don't get read back on re-populate, but
>>>>> typically
>>>>> cleared.
>>>>>
>>>>> But what would be the point of keeping swapped-out bos on the LRU
>>>>> list?, particularly when we're iterating under a spinlock?
>>>>> Shouldn't we try to re-add to LRU (if not already on an LRU) just
>>>>> before populating? There aren't really that many calls in core TTM.
>>>> I want to avoid removing BOs from the LRU as much as possible since
>>>> we
>>>> forgot on multiple places that we want to re-add them.
>>>>
>>>> Conceptual I think the swapped BOs should have a separate memory
>>>> domain,
>>>> this way we can ignore them cleanly when swapping things out.
>>> Yes, that would of course work as well. Keeping them on the system LRU
>>> is IMO highly undesirable.
>>>
>>>> Going to pick this patch up, modifying it a bit more and then pushing
>>>> it
>>>> to drm-misc-fixes for upstreaming.
>>> OK, I dropped the TTM fix for the purge-in-swap-notify from the i915
>>> series, hoping that the reworked variant of this patch lands first.
>>
>> You will still need to add the second ttm_tt_populated() check since 
>> I dropped that for the back which I want to push to -fixes.
>>
>> Regards,
>> Christian.
>>
> OK, great. then you have my S-O-B on this patch.
>
> BTW that original patch that added the ttm_tt_is_populated() was 
> considered "LGTM" by you, except for this ttm_tt_is_populated(). So do 
> I have an Acked-by: on that now?
>
> That is
>
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fpatch%2F435833%2F%3Fseries%3D90681%26rev%3D2&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C4580ac1413cb414888a008d921e35e49%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637578082688432837%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=VHROKh319e5jJL9grI31fLnA3ByJpEuML3PoJB7T2Lg%3D&amp;reserved=0 
>
>
> plus the check added?

Yeah, sure.

Christian.

>
> Thanks,
>
> Thomas
>
>
>
>
>>>
>>> Thanks,
>>> Thomas
>>>
>>>> Thanks,
>>>> Christian.
>>>>
>>>>> /Thomas
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>> Christian.
>>>>>>
>>>>>>> /Thomas
>>>>>>>
>>>
>>


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [Intel-gfx] [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory
@ 2021-05-28 14:21                     ` Christian König
  0 siblings, 0 replies; 25+ messages in thread
From: Christian König @ 2021-05-28 14:21 UTC (permalink / raw)
  To: Thomas Hellström, intel-gfx, dri-devel



Am 28.05.21 um 16:17 schrieb Thomas Hellström:
>
> On 5/28/21 4:10 PM, Christian König wrote:
>> Am 28.05.21 um 09:33 schrieb Thomas Hellström:
>>> On Fri, 2021-05-28 at 09:16 +0200, Christian König wrote:
>>>> Am 27.05.21 um 17:51 schrieb Thomas Hellström:
>>>>> On Thu, 2021-05-27 at 17:32 +0200, Christian König wrote:
>>>>>> Am 27.05.21 um 17:05 schrieb Thomas Hellström:
>>>>>>> On Thu, 2021-05-27 at 17:01 +0200, Thomas Hellström wrote:
>>>>>>>> On Thu, 2021-05-27 at 16:54 +0200, Christian König wrote:
>>>>>>>>> Am 27.05.21 um 16:19 schrieb Thomas Hellström:
>>>>>>>>>> The swapping code was dereference bo->ttm pointers
>>>>>>>>>> without
>>>>>>>>>> having
>>>>>>>>>> the
>>>>>>>>>> dma-resv lock held. Also it might try to swap out
>>>>>>>>>> unpopulated
>>>>>>>>>> bos.
>>>>>>>>>>
>>>>>>>>>> Fix this by moving the bo->ttm dereference until we have
>>>>>>>>>> the
>>>>>>>>>> reservation
>>>>>>>>>> lock. Check that the ttm_tt is populated after the
>>>>>>>>>> swap_notify
>>>>>>>>>> callback.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Thomas Hellström
>>>>>>>>>> <thomas.hellstrom@linux.intel.com>
>>>>>>>>>> ---
>>>>>>>>>>      drivers/gpu/drm/ttm/ttm_bo.c     | 16
>>>>>>>>>> +++++++++++++++-
>>>>>>>>>>      drivers/gpu/drm/ttm/ttm_device.c |  8 +++-----
>>>>>>>>>>      2 files changed, 18 insertions(+), 6 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>>> b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>>> index 9f53506a82fc..86213d37657b 100644
>>>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>>>>>>> @@ -1163,6 +1163,16 @@ int ttm_bo_swapout(struct
>>>>>>>>>> ttm_buffer_object
>>>>>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>>>>>            if (!ttm_bo_evict_swapout_allowable(bo, ctx,
>>>>>>>>>> &place,
>>>>>>>>>> &locked, NULL))
>>>>>>>>>>                    return -EBUSY;
>>>>>>>>>> +       dma_resv_assert_held(bo->base.resv);
>>>>>>>>>> +
>>>>>>>>>> +       if (!bo->ttm ||
>>>>>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
>>>>>>>>>> +           bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
>>>>>>>>>> {
>>>>>>>>>> +               if (locked)
>>>>>>>>>> +                       dma_resv_unlock(bo->base.resv);
>>>>>>>>>> +               return -EBUSY;
>>>>>>>>>> +       }
>>>>>>>>>> +
>>>>>>>>>>            if (!ttm_bo_get_unless_zero(bo)) {
>>>>>>>>>>                    if (locked)
>>>>>>>>>>                            dma_resv_unlock(bo->base.resv);
>>>>>>>>>> @@ -1215,7 +1225,8 @@ int ttm_bo_swapout(struct
>>>>>>>>>> ttm_buffer_object
>>>>>>>>>> *bo, struct ttm_operation_ctx *ctx,
>>>>>>>>>>            if (bo->bdev->funcs->swap_notify)
>>>>>>>>>>                    bo->bdev->funcs->swap_notify(bo);
>>>>>>>>>>      -       ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>>>>>>>> gfp_flags);
>>>>>>>>>> +       if (ttm_tt_is_populated(bo->ttm))
>>>>>>>>>> +               ret = ttm_tt_swapout(bo->bdev, bo->ttm,
>>>>>>>>>> gfp_flags);
>>>>>>>>> Exactly that is what I won't recommend. We would try to
>>>>>>>>> swap
>>>>>>>>> out
>>>>>>>>> the
>>>>>>>>> same BO over and over again with that.
>>>>>>>> But we wouldn't since the BO is taken off the LRU and never
>>>>>>>> re-
>>>>>>>> added,
>>>>>>>>
>>>>>>>>
>>>>>>> In fact, we'd probably might want to take the !bo->ttm bos off
>>>>>>> the
>>>>>>> LRU
>>>>>>> as well..
>>>>>> No, we don't want to take any BOs of the LRU unless they are
>>>>>> pinned.
>>>>>>
>>>>>> Adding a TT object or populating it doesn't necessarily put the
>>>>>> BO
>>>>>> back
>>>>>> to the LRU.
>>>>> OK, but swapped bos are also taken off the LRU list so these
>>>>> unpopulated bos are just taking the same path. Only difference to
>>>>> swapped is that they don't get read back on re-populate, but
>>>>> typically
>>>>> cleared.
>>>>>
>>>>> But what would be the point of keeping swapped-out bos on the LRU
>>>>> list?, particularly when we're iterating under a spinlock?
>>>>> Shouldn't we try to re-add to LRU (if not already on an LRU) just
>>>>> before populating? There aren't really that many calls in core TTM.
>>>> I want to avoid removing BOs from the LRU as much as possible since
>>>> we
>>>> forgot on multiple places that we want to re-add them.
>>>>
>>>> Conceptual I think the swapped BOs should have a separate memory
>>>> domain,
>>>> this way we can ignore them cleanly when swapping things out.
>>> Yes, that would of course work as well. Keeping them on the system LRU
>>> is IMO highly undesirable.
>>>
>>>> Going to pick this patch up, modifying it a bit more and then pushing
>>>> it
>>>> to drm-misc-fixes for upstreaming.
>>> OK, I dropped the TTM fix for the purge-in-swap-notify from the i915
>>> series, hoping that the reworked variant of this patch lands first.
>>
>> You will still need to add the second ttm_tt_populated() check since 
>> I dropped that for the back which I want to push to -fixes.
>>
>> Regards,
>> Christian.
>>
> OK, great. then you have my S-O-B on this patch.
>
> BTW that original patch that added the ttm_tt_is_populated() was 
> considered "LGTM" by you, except for this ttm_tt_is_populated(). So do 
> I have an Acked-by: on that now?
>
> That is
>
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fpatch%2F435833%2F%3Fseries%3D90681%26rev%3D2&amp;data=04%7C01%7Cchristian.koenig%40amd.com%7C4580ac1413cb414888a008d921e35e49%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637578082688432837%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=VHROKh319e5jJL9grI31fLnA3ByJpEuML3PoJB7T2Lg%3D&amp;reserved=0 
>
>
> plus the check added?

Yeah, sure.

Christian.

>
> Thanks,
>
> Thomas
>
>
>
>
>>>
>>> Thanks,
>>> Thomas
>>>
>>>> Thanks,
>>>> Christian.
>>>>
>>>>> /Thomas
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>> Christian.
>>>>>>
>>>>>>> /Thomas
>>>>>>>
>>>
>>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2021-05-28 14:21 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-27 14:19 [RFC PATCH] drm/ttm: Fix swapping dereferences of freed memory Thomas Hellström
2021-05-27 14:19 ` [Intel-gfx] " Thomas Hellström
2021-05-27 14:54 ` Christian König
2021-05-27 14:54   ` [Intel-gfx] " Christian König
2021-05-27 15:01   ` Thomas Hellström
2021-05-27 15:01     ` [Intel-gfx] " Thomas Hellström
2021-05-27 15:05     ` Thomas Hellström
2021-05-27 15:05       ` [Intel-gfx] " Thomas Hellström
2021-05-27 15:32       ` Christian König
2021-05-27 15:32         ` [Intel-gfx] " Christian König
2021-05-27 15:51         ` Thomas Hellström
2021-05-27 15:51           ` [Intel-gfx] " Thomas Hellström
2021-05-28  7:16           ` Christian König
2021-05-28  7:16             ` [Intel-gfx] " Christian König
2021-05-28  7:33             ` Thomas Hellström
2021-05-28  7:33               ` [Intel-gfx] " Thomas Hellström
2021-05-28 14:10               ` Christian König
2021-05-28 14:10                 ` [Intel-gfx] " Christian König
2021-05-28 14:17                 ` Thomas Hellström
2021-05-28 14:17                   ` [Intel-gfx] " Thomas Hellström
2021-05-28 14:21                   ` Christian König
2021-05-28 14:21                     ` [Intel-gfx] " Christian König
2021-05-27 15:17     ` Christian König
2021-05-27 15:17       ` [Intel-gfx] " Christian König
2021-05-27 19:58 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.