All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail
@ 2018-09-12 19:23 Christian König
  2018-09-13  8:35 ` Michel Dänzer
       [not found] ` <20180912192355.2359-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 2 replies; 14+ messages in thread
From: Christian König @ 2018-09-12 19:23 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

While cutting the lists we sometimes accidentally added a list_head from
the stack to the LRUs, effectively corrupting the list.

Remove the list cutting and use explicit list manipulation instead.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 51 ++++++++++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 138c98902033..b2a33bf1ef10 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -247,23 +247,18 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
 
-static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
-				    struct list_head *lru, bool is_swap)
+static void ttm_list_move_bulk_tail(struct list_head *list,
+				    struct list_head *first,
+				    struct list_head *last)
 {
-	struct list_head *list;
-	LIST_HEAD(entries);
-	LIST_HEAD(before);
+	first->prev->next = last->next;
+	last->next->prev = first->prev;
 
-	reservation_object_assert_held(pos->last->resv);
-	list = is_swap ? &pos->last->swap : &pos->last->lru;
-	list_cut_position(&entries, lru, list);
+	list->prev->next = first;
+	first->prev = list->prev;
 
-	reservation_object_assert_held(pos->first->resv);
-	list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
-	list_cut_position(&before, &entries, list);
-
-	list_splice(&before, lru);
-	list_splice_tail(&entries, lru);
+	last->next = list;
+	list->prev = last;
 }
 
 void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
@@ -271,23 +266,33 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
 	unsigned i;
 
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
 		struct ttm_mem_type_manager *man;
 
-		if (!bulk->tt[i].first)
+		if (!pos->first)
 			continue;
 
-		man = &bulk->tt[i].first->bdev->man[TTM_PL_TT];
-		ttm_bo_bulk_move_helper(&bulk->tt[i], &man->lru[i], false);
+		reservation_object_assert_held(pos->first->resv);
+		reservation_object_assert_held(pos->last->resv);
+
+		man = &pos->first->bdev->man[TTM_PL_TT];
+		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
+					&pos->last->lru);
 	}
 
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
+		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
 		struct ttm_mem_type_manager *man;
 
-		if (!bulk->vram[i].first)
+		if (!pos->first)
 			continue;
 
-		man = &bulk->vram[i].first->bdev->man[TTM_PL_VRAM];
-		ttm_bo_bulk_move_helper(&bulk->vram[i], &man->lru[i], false);
+		reservation_object_assert_held(pos->first->resv);
+		reservation_object_assert_held(pos->last->resv);
+
+		man = &pos->first->bdev->man[TTM_PL_VRAM];
+		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
+					&pos->last->lru);
 	}
 
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
@@ -297,8 +302,12 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
 		if (!pos->first)
 			continue;
 
+		reservation_object_assert_held(pos->first->resv);
+		reservation_object_assert_held(pos->last->resv);
+
 		lru = &pos->first->bdev->glob->swap_lru[i];
-		ttm_bo_bulk_move_helper(&bulk->swap[i], lru, true);
+		ttm_list_move_bulk_tail(lru, &pos->first->swap,
+					&pos->last->swap);
 	}
 }
 EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail
       [not found] ` <20180912192355.2359-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-12 19:42   ` Alex Deucher
  2018-09-13  8:31   ` Huang Rui
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 14+ messages in thread
From: Alex Deucher @ 2018-09-12 19:42 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx list

On Wed, Sep 12, 2018 at 3:25 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> While cutting the lists we sometimes accidentally added a list_head from
> the stack to the LRUs, effectively corrupting the list.
>
> Remove the list cutting and use explicit list manipulation instead.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Acked-by: Alex Deucher <alexander.deucher@amd.com>

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c | 51 ++++++++++++++++++++++++++------------------
>  1 file changed, 30 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 138c98902033..b2a33bf1ef10 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -247,23 +247,18 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>  }
>  EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>
> -static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
> -                                   struct list_head *lru, bool is_swap)
> +static void ttm_list_move_bulk_tail(struct list_head *list,
> +                                   struct list_head *first,
> +                                   struct list_head *last)
>  {
> -       struct list_head *list;
> -       LIST_HEAD(entries);
> -       LIST_HEAD(before);
> +       first->prev->next = last->next;
> +       last->next->prev = first->prev;
>
> -       reservation_object_assert_held(pos->last->resv);
> -       list = is_swap ? &pos->last->swap : &pos->last->lru;
> -       list_cut_position(&entries, lru, list);
> +       list->prev->next = first;
> +       first->prev = list->prev;
>
> -       reservation_object_assert_held(pos->first->resv);
> -       list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
> -       list_cut_position(&before, &entries, list);
> -
> -       list_splice(&before, lru);
> -       list_splice_tail(&entries, lru);
> +       last->next = list;
> +       list->prev = last;
>  }
>
>  void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
> @@ -271,23 +266,33 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>         unsigned i;
>
>         for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +               struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
>                 struct ttm_mem_type_manager *man;
>
> -               if (!bulk->tt[i].first)
> +               if (!pos->first)
>                         continue;
>
> -               man = &bulk->tt[i].first->bdev->man[TTM_PL_TT];
> -               ttm_bo_bulk_move_helper(&bulk->tt[i], &man->lru[i], false);
> +               reservation_object_assert_held(pos->first->resv);
> +               reservation_object_assert_held(pos->last->resv);
> +
> +               man = &pos->first->bdev->man[TTM_PL_TT];
> +               ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
> +                                       &pos->last->lru);
>         }
>
>         for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +               struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
>                 struct ttm_mem_type_manager *man;
>
> -               if (!bulk->vram[i].first)
> +               if (!pos->first)
>                         continue;
>
> -               man = &bulk->vram[i].first->bdev->man[TTM_PL_VRAM];
> -               ttm_bo_bulk_move_helper(&bulk->vram[i], &man->lru[i], false);
> +               reservation_object_assert_held(pos->first->resv);
> +               reservation_object_assert_held(pos->last->resv);
> +
> +               man = &pos->first->bdev->man[TTM_PL_VRAM];
> +               ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
> +                                       &pos->last->lru);
>         }
>
>         for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> @@ -297,8 +302,12 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>                 if (!pos->first)
>                         continue;
>
> +               reservation_object_assert_held(pos->first->resv);
> +               reservation_object_assert_held(pos->last->resv);
> +
>                 lru = &pos->first->bdev->glob->swap_lru[i];
> -               ttm_bo_bulk_move_helper(&bulk->swap[i], lru, true);
> +               ttm_list_move_bulk_tail(lru, &pos->first->swap,
> +                                       &pos->last->swap);
>         }
>  }
>  EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
> --
> 2.14.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail
       [not found] ` <20180912192355.2359-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-09-12 19:42   ` Alex Deucher
@ 2018-09-13  8:31   ` Huang Rui
  2018-09-13 11:32     ` Christian König
  2018-09-13  8:35   ` Michel Dänzer
  2018-09-13 14:25   ` Alex Deucher
  3 siblings, 1 reply; 14+ messages in thread
From: Huang Rui @ 2018-09-13  8:31 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Wed, Sep 12, 2018 at 09:23:55PM +0200, Christian König wrote:
> While cutting the lists we sometimes accidentally added a list_head from
> the stack to the LRUs, effectively corrupting the list.
> 
> Remove the list cutting and use explicit list manipulation instead.

This patch actually fixes the corruption bug. Was it a defect of
list_cut_position or list_splice handlers?

Reviewed-and-Tested: Huang Rui <ray.huang@amd.com>

> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/ttm/ttm_bo.c | 51 ++++++++++++++++++++++++++------------------
>  1 file changed, 30 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 138c98902033..b2a33bf1ef10 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -247,23 +247,18 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>  }
>  EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>  
> -static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
> -				    struct list_head *lru, bool is_swap)
> +static void ttm_list_move_bulk_tail(struct list_head *list,
> +				    struct list_head *first,
> +				    struct list_head *last)
>  {
> -	struct list_head *list;
> -	LIST_HEAD(entries);
> -	LIST_HEAD(before);
> +	first->prev->next = last->next;
> +	last->next->prev = first->prev;
>  
> -	reservation_object_assert_held(pos->last->resv);
> -	list = is_swap ? &pos->last->swap : &pos->last->lru;
> -	list_cut_position(&entries, lru, list);
> +	list->prev->next = first;
> +	first->prev = list->prev;
>  
> -	reservation_object_assert_held(pos->first->resv);
> -	list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
> -	list_cut_position(&before, &entries, list);
> -
> -	list_splice(&before, lru);
> -	list_splice_tail(&entries, lru);
> +	last->next = list;
> +	list->prev = last;
>  }
>  
>  void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
> @@ -271,23 +266,33 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>  	unsigned i;
>  
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
>  		struct ttm_mem_type_manager *man;
>  
> -		if (!bulk->tt[i].first)
> +		if (!pos->first)
>  			continue;
>  
> -		man = &bulk->tt[i].first->bdev->man[TTM_PL_TT];
> -		ttm_bo_bulk_move_helper(&bulk->tt[i], &man->lru[i], false);
> +		reservation_object_assert_held(pos->first->resv);
> +		reservation_object_assert_held(pos->last->resv);
> +
> +		man = &pos->first->bdev->man[TTM_PL_TT];
> +		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
> +					&pos->last->lru);
>  	}
>  
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
>  		struct ttm_mem_type_manager *man;
>  
> -		if (!bulk->vram[i].first)
> +		if (!pos->first)
>  			continue;
>  
> -		man = &bulk->vram[i].first->bdev->man[TTM_PL_VRAM];
> -		ttm_bo_bulk_move_helper(&bulk->vram[i], &man->lru[i], false);
> +		reservation_object_assert_held(pos->first->resv);
> +		reservation_object_assert_held(pos->last->resv);
> +
> +		man = &pos->first->bdev->man[TTM_PL_VRAM];
> +		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
> +					&pos->last->lru);
>  	}
>  
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> @@ -297,8 +302,12 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>  		if (!pos->first)
>  			continue;
>  
> +		reservation_object_assert_held(pos->first->resv);
> +		reservation_object_assert_held(pos->last->resv);
> +
>  		lru = &pos->first->bdev->glob->swap_lru[i];
> -		ttm_bo_bulk_move_helper(&bulk->swap[i], lru, true);
> +		ttm_list_move_bulk_tail(lru, &pos->first->swap,
> +					&pos->last->swap);
>  	}
>  }
>  EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
> -- 
> 2.14.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail
  2018-09-12 19:23 [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail Christian König
@ 2018-09-13  8:35 ` Michel Dänzer
  2018-09-13  8:55   ` Christian König
       [not found] ` <20180912192355.2359-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 1 reply; 14+ messages in thread
From: Michel Dänzer @ 2018-09-13  8:35 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel


[ Moving to dri-devel, where TTM patches are reviewed ]

On 2018-09-12 9:23 p.m., Christian König wrote:
> While cutting the lists we sometimes accidentally added a list_head from
> the stack to the LRUs, effectively corrupting the list.
> 
> Remove the list cutting and use explicit list manipulation instead.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/ttm/ttm_bo.c | 51 ++++++++++++++++++++++++++------------------
>  1 file changed, 30 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 138c98902033..b2a33bf1ef10 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -247,23 +247,18 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>  }
>  EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>  
> -static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
> -				    struct list_head *lru, bool is_swap)
> +static void ttm_list_move_bulk_tail(struct list_head *list,
> +				    struct list_head *first,
> +				    struct list_head *last)
>  {
> -	struct list_head *list;
> -	LIST_HEAD(entries);
> -	LIST_HEAD(before);
> +	first->prev->next = last->next;
> +	last->next->prev = first->prev;
>  
> -	reservation_object_assert_held(pos->last->resv);
> -	list = is_swap ? &pos->last->swap : &pos->last->lru;
> -	list_cut_position(&entries, lru, list);
> +	list->prev->next = first;
> +	first->prev = list->prev;
>  
> -	reservation_object_assert_held(pos->first->resv);
> -	list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
> -	list_cut_position(&before, &entries, list);
> -
> -	list_splice(&before, lru);
> -	list_splice_tail(&entries, lru);
> +	last->next = list;
> +	list->prev = last;
>  }
>  
>  void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
> @@ -271,23 +266,33 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>  	unsigned i;
>  
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
>  		struct ttm_mem_type_manager *man;
>  
> -		if (!bulk->tt[i].first)
> +		if (!pos->first)
>  			continue;
>  
> -		man = &bulk->tt[i].first->bdev->man[TTM_PL_TT];
> -		ttm_bo_bulk_move_helper(&bulk->tt[i], &man->lru[i], false);
> +		reservation_object_assert_held(pos->first->resv);
> +		reservation_object_assert_held(pos->last->resv);
> +
> +		man = &pos->first->bdev->man[TTM_PL_TT];
> +		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
> +					&pos->last->lru);
>  	}
>  
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
>  		struct ttm_mem_type_manager *man;
>  
> -		if (!bulk->vram[i].first)
> +		if (!pos->first)
>  			continue;
>  
> -		man = &bulk->vram[i].first->bdev->man[TTM_PL_VRAM];
> -		ttm_bo_bulk_move_helper(&bulk->vram[i], &man->lru[i], false);
> +		reservation_object_assert_held(pos->first->resv);
> +		reservation_object_assert_held(pos->last->resv);
> +
> +		man = &pos->first->bdev->man[TTM_PL_VRAM];
> +		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
> +					&pos->last->lru);
>  	}
>  
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> @@ -297,8 +302,12 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>  		if (!pos->first)
>  			continue;
>  
> +		reservation_object_assert_held(pos->first->resv);
> +		reservation_object_assert_held(pos->last->resv);
> +
>  		lru = &pos->first->bdev->glob->swap_lru[i];
> -		ttm_bo_bulk_move_helper(&bulk->swap[i], lru, true);
> +		ttm_list_move_bulk_tail(lru, &pos->first->swap,
> +					&pos->last->swap);
>  	}
>  }
>  EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
> 

Seems like more code could be kept in / moved into the shared helper
function. That would keep the ttm_bo_bulk_move_lru_tail code leaner, and
might make the helper function changes easier to review as well.


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail
       [not found] ` <20180912192355.2359-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-09-12 19:42   ` Alex Deucher
  2018-09-13  8:31   ` Huang Rui
@ 2018-09-13  8:35   ` Michel Dänzer
  2018-09-13 14:25   ` Alex Deucher
  3 siblings, 0 replies; 14+ messages in thread
From: Michel Dänzer @ 2018-09-13  8:35 UTC (permalink / raw)
  To: Christian König; +Cc: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[ Moving to dri-devel, where TTM patches are reviewed ]

On 2018-09-12 9:23 p.m., Christian König wrote:
> While cutting the lists we sometimes accidentally added a list_head from
> the stack to the LRUs, effectively corrupting the list.
> 
> Remove the list cutting and use explicit list manipulation instead.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/ttm/ttm_bo.c | 51 ++++++++++++++++++++++++++------------------
>  1 file changed, 30 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 138c98902033..b2a33bf1ef10 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -247,23 +247,18 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>  }
>  EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>  
> -static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
> -				    struct list_head *lru, bool is_swap)
> +static void ttm_list_move_bulk_tail(struct list_head *list,
> +				    struct list_head *first,
> +				    struct list_head *last)
>  {
> -	struct list_head *list;
> -	LIST_HEAD(entries);
> -	LIST_HEAD(before);
> +	first->prev->next = last->next;
> +	last->next->prev = first->prev;
>  
> -	reservation_object_assert_held(pos->last->resv);
> -	list = is_swap ? &pos->last->swap : &pos->last->lru;
> -	list_cut_position(&entries, lru, list);
> +	list->prev->next = first;
> +	first->prev = list->prev;
>  
> -	reservation_object_assert_held(pos->first->resv);
> -	list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
> -	list_cut_position(&before, &entries, list);
> -
> -	list_splice(&before, lru);
> -	list_splice_tail(&entries, lru);
> +	last->next = list;
> +	list->prev = last;
>  }
>  
>  void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
> @@ -271,23 +266,33 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>  	unsigned i;
>  
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
>  		struct ttm_mem_type_manager *man;
>  
> -		if (!bulk->tt[i].first)
> +		if (!pos->first)
>  			continue;
>  
> -		man = &bulk->tt[i].first->bdev->man[TTM_PL_TT];
> -		ttm_bo_bulk_move_helper(&bulk->tt[i], &man->lru[i], false);
> +		reservation_object_assert_held(pos->first->resv);
> +		reservation_object_assert_held(pos->last->resv);
> +
> +		man = &pos->first->bdev->man[TTM_PL_TT];
> +		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
> +					&pos->last->lru);
>  	}
>  
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
>  		struct ttm_mem_type_manager *man;
>  
> -		if (!bulk->vram[i].first)
> +		if (!pos->first)
>  			continue;
>  
> -		man = &bulk->vram[i].first->bdev->man[TTM_PL_VRAM];
> -		ttm_bo_bulk_move_helper(&bulk->vram[i], &man->lru[i], false);
> +		reservation_object_assert_held(pos->first->resv);
> +		reservation_object_assert_held(pos->last->resv);
> +
> +		man = &pos->first->bdev->man[TTM_PL_VRAM];
> +		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
> +					&pos->last->lru);
>  	}
>  
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> @@ -297,8 +302,12 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>  		if (!pos->first)
>  			continue;
>  
> +		reservation_object_assert_held(pos->first->resv);
> +		reservation_object_assert_held(pos->last->resv);
> +
>  		lru = &pos->first->bdev->glob->swap_lru[i];
> -		ttm_bo_bulk_move_helper(&bulk->swap[i], lru, true);
> +		ttm_list_move_bulk_tail(lru, &pos->first->swap,
> +					&pos->last->swap);
>  	}
>  }
>  EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
> 

Seems like more code could be kept in / moved into the shared helper
function. That would keep the ttm_bo_bulk_move_lru_tail code leaner, and
might make the helper function changes easier to review as well.


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail
  2018-09-13  8:35 ` Michel Dänzer
@ 2018-09-13  8:55   ` Christian König
  2018-09-13  9:00     ` Michel Dänzer
  0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2018-09-13  8:55 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: dri-devel

Am 13.09.2018 um 10:35 schrieb Michel Dänzer:
> [ Moving to dri-devel, where TTM patches are reviewed ]
>
> On 2018-09-12 9:23 p.m., Christian König wrote:
>> While cutting the lists we sometimes accidentally added a list_head from
>> the stack to the LRUs, effectively corrupting the list.
>>
>> Remove the list cutting and use explicit list manipulation instead.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo.c | 51 ++++++++++++++++++++++++++------------------
>>   1 file changed, 30 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index 138c98902033..b2a33bf1ef10 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -247,23 +247,18 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>>   }
>>   EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>>   
>> -static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
>> -				    struct list_head *lru, bool is_swap)
>> +static void ttm_list_move_bulk_tail(struct list_head *list,
>> +				    struct list_head *first,
>> +				    struct list_head *last)
>>   {
>> -	struct list_head *list;
>> -	LIST_HEAD(entries);
>> -	LIST_HEAD(before);
>> +	first->prev->next = last->next;
>> +	last->next->prev = first->prev;
>>   
>> -	reservation_object_assert_held(pos->last->resv);
>> -	list = is_swap ? &pos->last->swap : &pos->last->lru;
>> -	list_cut_position(&entries, lru, list);
>> +	list->prev->next = first;
>> +	first->prev = list->prev;
>>   
>> -	reservation_object_assert_held(pos->first->resv);
>> -	list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>> -	list_cut_position(&before, &entries, list);
>> -
>> -	list_splice(&before, lru);
>> -	list_splice_tail(&entries, lru);
>> +	last->next = list;
>> +	list->prev = last;
>>   }
>>   
>>   void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>> @@ -271,23 +266,33 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>>   	unsigned i;
>>   
>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> +		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
>>   		struct ttm_mem_type_manager *man;
>>   
>> -		if (!bulk->tt[i].first)
>> +		if (!pos->first)
>>   			continue;
>>   
>> -		man = &bulk->tt[i].first->bdev->man[TTM_PL_TT];
>> -		ttm_bo_bulk_move_helper(&bulk->tt[i], &man->lru[i], false);
>> +		reservation_object_assert_held(pos->first->resv);
>> +		reservation_object_assert_held(pos->last->resv);
>> +
>> +		man = &pos->first->bdev->man[TTM_PL_TT];
>> +		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
>> +					&pos->last->lru);
>>   	}
>>   
>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> +		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
>>   		struct ttm_mem_type_manager *man;
>>   
>> -		if (!bulk->vram[i].first)
>> +		if (!pos->first)
>>   			continue;
>>   
>> -		man = &bulk->vram[i].first->bdev->man[TTM_PL_VRAM];
>> -		ttm_bo_bulk_move_helper(&bulk->vram[i], &man->lru[i], false);
>> +		reservation_object_assert_held(pos->first->resv);
>> +		reservation_object_assert_held(pos->last->resv);
>> +
>> +		man = &pos->first->bdev->man[TTM_PL_VRAM];
>> +		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
>> +					&pos->last->lru);
>>   	}
>>   
>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> @@ -297,8 +302,12 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>>   		if (!pos->first)
>>   			continue;
>>   
>> +		reservation_object_assert_held(pos->first->resv);
>> +		reservation_object_assert_held(pos->last->resv);
>> +
>>   		lru = &pos->first->bdev->glob->swap_lru[i];
>> -		ttm_bo_bulk_move_helper(&bulk->swap[i], lru, true);
>> +		ttm_list_move_bulk_tail(lru, &pos->first->swap,
>> +					&pos->last->swap);
>>   	}
>>   }
>>   EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
>>
> Seems like more code could be kept in / moved into the shared helper
> function. That would keep the ttm_bo_bulk_move_lru_tail code leaner, and
> might make the helper function changes easier to review as well.

Yeah, actually only wanted to send that to Rui and you for testing.

Going to clean that up and send it for upstream review today.

Christian.
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail
  2018-09-13  8:55   ` Christian König
@ 2018-09-13  9:00     ` Michel Dänzer
  0 siblings, 0 replies; 14+ messages in thread
From: Michel Dänzer @ 2018-09-13  9:00 UTC (permalink / raw)
  To: christian.koenig; +Cc: dri-devel

On 2018-09-13 10:55 a.m., Christian König wrote:
> Am 13.09.2018 um 10:35 schrieb Michel Dänzer:
>> [ Moving to dri-devel, where TTM patches are reviewed ]
>>
>> On 2018-09-12 9:23 p.m., Christian König wrote:
>>> While cutting the lists we sometimes accidentally added a list_head from
>>> the stack to the LRUs, effectively corrupting the list.
>>>
>>> Remove the list cutting and use explicit list manipulation instead.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/ttm/ttm_bo.c | 51
>>> ++++++++++++++++++++++++++------------------
>>>   1 file changed, 30 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>> index 138c98902033..b2a33bf1ef10 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>> @@ -247,23 +247,18 @@ void ttm_bo_move_to_lru_tail(struct
>>> ttm_buffer_object *bo,
>>>   }
>>>   EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>>>   -static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos
>>> *pos,
>>> -                    struct list_head *lru, bool is_swap)
>>> +static void ttm_list_move_bulk_tail(struct list_head *list,
>>> +                    struct list_head *first,
>>> +                    struct list_head *last)
>>>   {
>>> -    struct list_head *list;
>>> -    LIST_HEAD(entries);
>>> -    LIST_HEAD(before);
>>> +    first->prev->next = last->next;
>>> +    last->next->prev = first->prev;
>>>   -    reservation_object_assert_held(pos->last->resv);
>>> -    list = is_swap ? &pos->last->swap : &pos->last->lru;
>>> -    list_cut_position(&entries, lru, list);
>>> +    list->prev->next = first;
>>> +    first->prev = list->prev;
>>>   -    reservation_object_assert_held(pos->first->resv);
>>> -    list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>>> -    list_cut_position(&before, &entries, list);
>>> -
>>> -    list_splice(&before, lru);
>>> -    list_splice_tail(&entries, lru);
>>> +    last->next = list;
>>> +    list->prev = last;
>>>   }
>>>     void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>>> @@ -271,23 +266,33 @@ void ttm_bo_bulk_move_lru_tail(struct
>>> ttm_lru_bulk_move *bulk)
>>>       unsigned i;
>>>         for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>> +        struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
>>>           struct ttm_mem_type_manager *man;
>>>   -        if (!bulk->tt[i].first)
>>> +        if (!pos->first)
>>>               continue;
>>>   -        man = &bulk->tt[i].first->bdev->man[TTM_PL_TT];
>>> -        ttm_bo_bulk_move_helper(&bulk->tt[i], &man->lru[i], false);
>>> +        reservation_object_assert_held(pos->first->resv);
>>> +        reservation_object_assert_held(pos->last->resv);
>>> +
>>> +        man = &pos->first->bdev->man[TTM_PL_TT];
>>> +        ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
>>> +                    &pos->last->lru);
>>>       }
>>>         for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>> +        struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
>>>           struct ttm_mem_type_manager *man;
>>>   -        if (!bulk->vram[i].first)
>>> +        if (!pos->first)
>>>               continue;
>>>   -        man = &bulk->vram[i].first->bdev->man[TTM_PL_VRAM];
>>> -        ttm_bo_bulk_move_helper(&bulk->vram[i], &man->lru[i], false);
>>> +        reservation_object_assert_held(pos->first->resv);
>>> +        reservation_object_assert_held(pos->last->resv);
>>> +
>>> +        man = &pos->first->bdev->man[TTM_PL_VRAM];
>>> +        ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
>>> +                    &pos->last->lru);
>>>       }
>>>         for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>>> @@ -297,8 +302,12 @@ void ttm_bo_bulk_move_lru_tail(struct
>>> ttm_lru_bulk_move *bulk)
>>>           if (!pos->first)
>>>               continue;
>>>   +        reservation_object_assert_held(pos->first->resv);
>>> +        reservation_object_assert_held(pos->last->resv);
>>> +
>>>           lru = &pos->first->bdev->glob->swap_lru[i];
>>> -        ttm_bo_bulk_move_helper(&bulk->swap[i], lru, true);
>>> +        ttm_list_move_bulk_tail(lru, &pos->first->swap,
>>> +                    &pos->last->swap);
>>>       }
>>>   }
>>>   EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
>>>
>> Seems like more code could be kept in / moved into the shared helper
>> function. That would keep the ttm_bo_bulk_move_lru_tail code leaner, and
>> might make the helper function changes easier to review as well.
> 
> Yeah, actually only wanted to send that to Rui and you for testing.
> 
> Going to clean that up and send it for upstream review today.

Sounds good, but Tom needs to test it as well.


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail
  2018-09-13  8:31   ` Huang Rui
@ 2018-09-13 11:32     ` Christian König
       [not found]       ` <c2471ef9-1bc7-517e-27f6-a00236a9b40b-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2018-09-13 11:32 UTC (permalink / raw)
  To: Huang Rui; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 13.09.2018 um 10:31 schrieb Huang Rui:
> On Wed, Sep 12, 2018 at 09:23:55PM +0200, Christian König wrote:
>> While cutting the lists we sometimes accidentally added a list_head from
>> the stack to the LRUs, effectively corrupting the list.
>>
>> Remove the list cutting and use explicit list manipulation instead.
> This patch actually fixes the corruption bug. Was it a defect of
> list_cut_position or list_splice handlers?

We somehow did something illegal with list_cut_position. I haven't 
narrowed it down till the end, but we ended up with list_heads from the 
stack to the lru.

Anyway adding a specialized list bulk move function is much simpler and 
avoids the issue.

I've just split that up as Michel suggested and send it out to the 
mailing lists, please review that version once more.

Thanks,
Christian.

>
> Reviewed-and-Tested: Huang Rui <ray.huang@amd.com>
>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo.c | 51 ++++++++++++++++++++++++++------------------
>>   1 file changed, 30 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index 138c98902033..b2a33bf1ef10 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -247,23 +247,18 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>>   }
>>   EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>>   
>> -static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
>> -				    struct list_head *lru, bool is_swap)
>> +static void ttm_list_move_bulk_tail(struct list_head *list,
>> +				    struct list_head *first,
>> +				    struct list_head *last)
>>   {
>> -	struct list_head *list;
>> -	LIST_HEAD(entries);
>> -	LIST_HEAD(before);
>> +	first->prev->next = last->next;
>> +	last->next->prev = first->prev;
>>   
>> -	reservation_object_assert_held(pos->last->resv);
>> -	list = is_swap ? &pos->last->swap : &pos->last->lru;
>> -	list_cut_position(&entries, lru, list);
>> +	list->prev->next = first;
>> +	first->prev = list->prev;
>>   
>> -	reservation_object_assert_held(pos->first->resv);
>> -	list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>> -	list_cut_position(&before, &entries, list);
>> -
>> -	list_splice(&before, lru);
>> -	list_splice_tail(&entries, lru);
>> +	last->next = list;
>> +	list->prev = last;
>>   }
>>   
>>   void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>> @@ -271,23 +266,33 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>>   	unsigned i;
>>   
>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> +		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
>>   		struct ttm_mem_type_manager *man;
>>   
>> -		if (!bulk->tt[i].first)
>> +		if (!pos->first)
>>   			continue;
>>   
>> -		man = &bulk->tt[i].first->bdev->man[TTM_PL_TT];
>> -		ttm_bo_bulk_move_helper(&bulk->tt[i], &man->lru[i], false);
>> +		reservation_object_assert_held(pos->first->resv);
>> +		reservation_object_assert_held(pos->last->resv);
>> +
>> +		man = &pos->first->bdev->man[TTM_PL_TT];
>> +		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
>> +					&pos->last->lru);
>>   	}
>>   
>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> +		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
>>   		struct ttm_mem_type_manager *man;
>>   
>> -		if (!bulk->vram[i].first)
>> +		if (!pos->first)
>>   			continue;
>>   
>> -		man = &bulk->vram[i].first->bdev->man[TTM_PL_VRAM];
>> -		ttm_bo_bulk_move_helper(&bulk->vram[i], &man->lru[i], false);
>> +		reservation_object_assert_held(pos->first->resv);
>> +		reservation_object_assert_held(pos->last->resv);
>> +
>> +		man = &pos->first->bdev->man[TTM_PL_VRAM];
>> +		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
>> +					&pos->last->lru);
>>   	}
>>   
>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> @@ -297,8 +302,12 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>>   		if (!pos->first)
>>   			continue;
>>   
>> +		reservation_object_assert_held(pos->first->resv);
>> +		reservation_object_assert_held(pos->last->resv);
>> +
>>   		lru = &pos->first->bdev->glob->swap_lru[i];
>> -		ttm_bo_bulk_move_helper(&bulk->swap[i], lru, true);
>> +		ttm_list_move_bulk_tail(lru, &pos->first->swap,
>> +					&pos->last->swap);
>>   	}
>>   }
>>   EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
>> -- 
>> 2.14.1
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail
       [not found] ` <20180912192355.2359-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2018-09-13  8:35   ` Michel Dänzer
@ 2018-09-13 14:25   ` Alex Deucher
       [not found]     ` <CADnq5_PiVfsBjGxN0wsbHL0o5rWYRuVehTeZvN396kp=GVZTNA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  3 siblings, 1 reply; 14+ messages in thread
From: Alex Deucher @ 2018-09-13 14:25 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx list

On Wed, Sep 12, 2018 at 3:25 PM Christian König
<ckoenig.leichtzumerken@gmail.com> wrote:
>
> While cutting the lists we sometimes accidentally added a list_head from
> the stack to the LRUs, effectively corrupting the list.
>
> Remove the list cutting and use explicit list manipulation instead.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>

Can you apply this patch first and then do the clean up as a later
series or temporarily disable the feature?  I want to send out a -next
pull this week and I don't want to leave this broken.

Alex

> ---
>  drivers/gpu/drm/ttm/ttm_bo.c | 51 ++++++++++++++++++++++++++------------------
>  1 file changed, 30 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 138c98902033..b2a33bf1ef10 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -247,23 +247,18 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>  }
>  EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>
> -static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
> -                                   struct list_head *lru, bool is_swap)
> +static void ttm_list_move_bulk_tail(struct list_head *list,
> +                                   struct list_head *first,
> +                                   struct list_head *last)
>  {
> -       struct list_head *list;
> -       LIST_HEAD(entries);
> -       LIST_HEAD(before);
> +       first->prev->next = last->next;
> +       last->next->prev = first->prev;
>
> -       reservation_object_assert_held(pos->last->resv);
> -       list = is_swap ? &pos->last->swap : &pos->last->lru;
> -       list_cut_position(&entries, lru, list);
> +       list->prev->next = first;
> +       first->prev = list->prev;
>
> -       reservation_object_assert_held(pos->first->resv);
> -       list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
> -       list_cut_position(&before, &entries, list);
> -
> -       list_splice(&before, lru);
> -       list_splice_tail(&entries, lru);
> +       last->next = list;
> +       list->prev = last;
>  }
>
>  void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
> @@ -271,23 +266,33 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>         unsigned i;
>
>         for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +               struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
>                 struct ttm_mem_type_manager *man;
>
> -               if (!bulk->tt[i].first)
> +               if (!pos->first)
>                         continue;
>
> -               man = &bulk->tt[i].first->bdev->man[TTM_PL_TT];
> -               ttm_bo_bulk_move_helper(&bulk->tt[i], &man->lru[i], false);
> +               reservation_object_assert_held(pos->first->resv);
> +               reservation_object_assert_held(pos->last->resv);
> +
> +               man = &pos->first->bdev->man[TTM_PL_TT];
> +               ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
> +                                       &pos->last->lru);
>         }
>
>         for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> +               struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
>                 struct ttm_mem_type_manager *man;
>
> -               if (!bulk->vram[i].first)
> +               if (!pos->first)
>                         continue;
>
> -               man = &bulk->vram[i].first->bdev->man[TTM_PL_VRAM];
> -               ttm_bo_bulk_move_helper(&bulk->vram[i], &man->lru[i], false);
> +               reservation_object_assert_held(pos->first->resv);
> +               reservation_object_assert_held(pos->last->resv);
> +
> +               man = &pos->first->bdev->man[TTM_PL_VRAM];
> +               ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
> +                                       &pos->last->lru);
>         }
>
>         for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> @@ -297,8 +302,12 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>                 if (!pos->first)
>                         continue;
>
> +               reservation_object_assert_held(pos->first->resv);
> +               reservation_object_assert_held(pos->last->resv);
> +
>                 lru = &pos->first->bdev->glob->swap_lru[i];
> -               ttm_bo_bulk_move_helper(&bulk->swap[i], lru, true);
> +               ttm_list_move_bulk_tail(lru, &pos->first->swap,
> +                                       &pos->last->swap);
>         }
>  }
>  EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
> --
> 2.14.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail
       [not found]     ` <CADnq5_PiVfsBjGxN0wsbHL0o5rWYRuVehTeZvN396kp=GVZTNA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2018-09-13 17:29       ` Christian König
  0 siblings, 0 replies; 14+ messages in thread
From: Christian König @ 2018-09-13 17:29 UTC (permalink / raw)
  To: Alex Deucher; +Cc: amd-gfx list

Am 13.09.2018 um 16:25 schrieb Alex Deucher:
> On Wed, Sep 12, 2018 at 3:25 PM Christian König
> <ckoenig.leichtzumerken@gmail.com> wrote:
>> While cutting the lists we sometimes accidentally added a list_head from
>> the stack to the LRUs, effectively corrupting the list.
>>
>> Remove the list cutting and use explicit list manipulation instead.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> Can you apply this patch first and then do the clean up as a later
> series or temporarily disable the feature?  I want to send out a -next
> pull this week and I don't want to leave this broken.

Yeah, that should work as well.

Christian.

>
> Alex
>
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo.c | 51 ++++++++++++++++++++++++++------------------
>>   1 file changed, 30 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index 138c98902033..b2a33bf1ef10 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -247,23 +247,18 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
>>   }
>>   EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>>
>> -static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
>> -                                   struct list_head *lru, bool is_swap)
>> +static void ttm_list_move_bulk_tail(struct list_head *list,
>> +                                   struct list_head *first,
>> +                                   struct list_head *last)
>>   {
>> -       struct list_head *list;
>> -       LIST_HEAD(entries);
>> -       LIST_HEAD(before);
>> +       first->prev->next = last->next;
>> +       last->next->prev = first->prev;
>>
>> -       reservation_object_assert_held(pos->last->resv);
>> -       list = is_swap ? &pos->last->swap : &pos->last->lru;
>> -       list_cut_position(&entries, lru, list);
>> +       list->prev->next = first;
>> +       first->prev = list->prev;
>>
>> -       reservation_object_assert_held(pos->first->resv);
>> -       list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>> -       list_cut_position(&before, &entries, list);
>> -
>> -       list_splice(&before, lru);
>> -       list_splice_tail(&entries, lru);
>> +       last->next = list;
>> +       list->prev = last;
>>   }
>>
>>   void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>> @@ -271,23 +266,33 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>>          unsigned i;
>>
>>          for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> +               struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
>>                  struct ttm_mem_type_manager *man;
>>
>> -               if (!bulk->tt[i].first)
>> +               if (!pos->first)
>>                          continue;
>>
>> -               man = &bulk->tt[i].first->bdev->man[TTM_PL_TT];
>> -               ttm_bo_bulk_move_helper(&bulk->tt[i], &man->lru[i], false);
>> +               reservation_object_assert_held(pos->first->resv);
>> +               reservation_object_assert_held(pos->last->resv);
>> +
>> +               man = &pos->first->bdev->man[TTM_PL_TT];
>> +               ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
>> +                                       &pos->last->lru);
>>          }
>>
>>          for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> +               struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
>>                  struct ttm_mem_type_manager *man;
>>
>> -               if (!bulk->vram[i].first)
>> +               if (!pos->first)
>>                          continue;
>>
>> -               man = &bulk->vram[i].first->bdev->man[TTM_PL_VRAM];
>> -               ttm_bo_bulk_move_helper(&bulk->vram[i], &man->lru[i], false);
>> +               reservation_object_assert_held(pos->first->resv);
>> +               reservation_object_assert_held(pos->last->resv);
>> +
>> +               man = &pos->first->bdev->man[TTM_PL_VRAM];
>> +               ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
>> +                                       &pos->last->lru);
>>          }
>>
>>          for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
>> @@ -297,8 +302,12 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
>>                  if (!pos->first)
>>                          continue;
>>
>> +               reservation_object_assert_held(pos->first->resv);
>> +               reservation_object_assert_held(pos->last->resv);
>> +
>>                  lru = &pos->first->bdev->glob->swap_lru[i];
>> -               ttm_bo_bulk_move_helper(&bulk->swap[i], lru, true);
>> +               ttm_list_move_bulk_tail(lru, &pos->first->swap,
>> +                                       &pos->last->swap);
>>          }
>>   }
>>   EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
>> --
>> 2.14.1
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail
       [not found]       ` <c2471ef9-1bc7-517e-27f6-a00236a9b40b-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2018-09-14  8:22         ` Huang Rui
  2018-09-14  9:22           ` Michel Dänzer
  0 siblings, 1 reply; 14+ messages in thread
From: Huang Rui @ 2018-09-14  8:22 UTC (permalink / raw)
  To: Koenig, Christian; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Thu, Sep 13, 2018 at 07:32:24PM +0800, Christian König wrote:
> Am 13.09.2018 um 10:31 schrieb Huang Rui:
> > On Wed, Sep 12, 2018 at 09:23:55PM +0200, Christian König wrote:
> >> While cutting the lists we sometimes accidentally added a list_head from
> >> the stack to the LRUs, effectively corrupting the list.
> >>
> >> Remove the list cutting and use explicit list manipulation instead.
> > This patch actually fixes the corruption bug. Was it a defect of
> > list_cut_position or list_splice handlers?
> 
> We somehow did something illegal with list_cut_position. I haven't 
> narrowed it down till the end, but we ended up with list_heads from the 
> stack to the lru.
> 

I am confused, in theory, even we do any manipulation with list helper, it
should not trigger the list corruption. The usage of those helpers should
ensure the list operation safely...

> Anyway adding a specialized list bulk move function is much simpler and 
> avoids the issue.
> 
> I've just split that up as Michel suggested and send it out to the 
> mailing lists, please review that version once more.
> 

Sure, already reviewed.

> Thanks,
> Christian.
> 
> >
> > Reviewed-and-Tested: Huang Rui <ray.huang@amd.com>
> >
> >> Signed-off-by: Christian König <christian.koenig@amd.com>
> >> ---
> >>   drivers/gpu/drm/ttm/ttm_bo.c | 51 ++++++++++++++++++++++++++------------------
> >>   1 file changed, 30 insertions(+), 21 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> >> index 138c98902033..b2a33bf1ef10 100644
> >> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> >> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> >> @@ -247,23 +247,18 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
> >>   }
> >>   EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
> >>   
> >> -static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
> >> -				    struct list_head *lru, bool is_swap)
> >> +static void ttm_list_move_bulk_tail(struct list_head *list,
> >> +				    struct list_head *first,
> >> +				    struct list_head *last)
> >>   {
> >> -	struct list_head *list;
> >> -	LIST_HEAD(entries);
> >> -	LIST_HEAD(before);
> >> +	first->prev->next = last->next;
> >> +	last->next->prev = first->prev;
> >>   
> >> -	reservation_object_assert_held(pos->last->resv);
> >> -	list = is_swap ? &pos->last->swap : &pos->last->lru;
> >> -	list_cut_position(&entries, lru, list);
> >> +	list->prev->next = first;
> >> +	first->prev = list->prev;
> >>   
> >> -	reservation_object_assert_held(pos->first->resv);
> >> -	list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
> >> -	list_cut_position(&before, &entries, list);
> >> -
> >> -	list_splice(&before, lru);
> >> -	list_splice_tail(&entries, lru);
> >> +	last->next = list;
> >> +	list->prev = last;
> >>   }
> >>   
> >>   void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
> >> @@ -271,23 +266,33 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
> >>   	unsigned i;
> >>   
> >>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >> +		struct ttm_lru_bulk_move_pos *pos = &bulk->tt[i];
> >>   		struct ttm_mem_type_manager *man;
> >>   
> >> -		if (!bulk->tt[i].first)
> >> +		if (!pos->first)
> >>   			continue;
> >>   
> >> -		man = &bulk->tt[i].first->bdev->man[TTM_PL_TT];
> >> -		ttm_bo_bulk_move_helper(&bulk->tt[i], &man->lru[i], false);
> >> +		reservation_object_assert_held(pos->first->resv);
> >> +		reservation_object_assert_held(pos->last->resv);
> >> +
> >> +		man = &pos->first->bdev->man[TTM_PL_TT];
> >> +		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
> >> +					&pos->last->lru);
> >>   	}
> >>   
> >>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >> +		struct ttm_lru_bulk_move_pos *pos = &bulk->vram[i];
> >>   		struct ttm_mem_type_manager *man;
> >>   
> >> -		if (!bulk->vram[i].first)
> >> +		if (!pos->first)
> >>   			continue;
> >>   
> >> -		man = &bulk->vram[i].first->bdev->man[TTM_PL_VRAM];
> >> -		ttm_bo_bulk_move_helper(&bulk->vram[i], &man->lru[i], false);
> >> +		reservation_object_assert_held(pos->first->resv);
> >> +		reservation_object_assert_held(pos->last->resv);
> >> +
> >> +		man = &pos->first->bdev->man[TTM_PL_VRAM];
> >> +		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
> >> +					&pos->last->lru);
> >>   	}
> >>   
> >>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
> >> @@ -297,8 +302,12 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
> >>   		if (!pos->first)
> >>   			continue;
> >>   
> >> +		reservation_object_assert_held(pos->first->resv);
> >> +		reservation_object_assert_held(pos->last->resv);
> >> +
> >>   		lru = &pos->first->bdev->glob->swap_lru[i];
> >> -		ttm_bo_bulk_move_helper(&bulk->swap[i], lru, true);
> >> +		ttm_list_move_bulk_tail(lru, &pos->first->swap,
> >> +					&pos->last->swap);
> >>   	}
> >>   }
> >>   EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
> >> -- 
> >> 2.14.1
> >>
> >> _______________________________________________
> >> amd-gfx mailing list
> >> amd-gfx@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail
  2018-09-14  8:22         ` Huang Rui
@ 2018-09-14  9:22           ` Michel Dänzer
       [not found]             ` <4bf23786-ffdb-ee6e-1446-9dbc8d8ed5d6-otUistvHUpPR7s880joybQ@public.gmane.org>
  0 siblings, 1 reply; 14+ messages in thread
From: Michel Dänzer @ 2018-09-14  9:22 UTC (permalink / raw)
  To: Huang Rui, Koenig, Christian; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2018-09-14 10:22 a.m., Huang Rui wrote:
> On Thu, Sep 13, 2018 at 07:32:24PM +0800, Christian König wrote:
>> Am 13.09.2018 um 10:31 schrieb Huang Rui:
>>> On Wed, Sep 12, 2018 at 09:23:55PM +0200, Christian König wrote:
>>>> While cutting the lists we sometimes accidentally added a list_head from
>>>> the stack to the LRUs, effectively corrupting the list.
>>>>
>>>> Remove the list cutting and use explicit list manipulation instead.
>>> This patch actually fixes the corruption bug. Was it a defect of
>>> list_cut_position or list_splice handlers?
>>
>> We somehow did something illegal with list_cut_position. I haven't 
>> narrowed it down till the end, but we ended up with list_heads from the 
>> stack to the lru.
> 
> I am confused, in theory, even we do any manipulation with list helper, it
> should not trigger the list corruption. The usage of those helpers should
> ensure the list operation safely...

There's nothing the helpers can do about being passed in pointers to
stack memory. It's a bug in the code using the helpers.


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail
       [not found]             ` <4bf23786-ffdb-ee6e-1446-9dbc8d8ed5d6-otUistvHUpPR7s880joybQ@public.gmane.org>
@ 2018-09-14 10:46               ` Christian König
  2018-09-17  9:40               ` Huang Rui
  1 sibling, 0 replies; 14+ messages in thread
From: Christian König @ 2018-09-14 10:46 UTC (permalink / raw)
  To: Michel Dänzer, Huang Rui; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 14.09.2018 um 11:22 schrieb Michel Dänzer:
> On 2018-09-14 10:22 a.m., Huang Rui wrote:
>> On Thu, Sep 13, 2018 at 07:32:24PM +0800, Christian König wrote:
>>> Am 13.09.2018 um 10:31 schrieb Huang Rui:
>>>> On Wed, Sep 12, 2018 at 09:23:55PM +0200, Christian König wrote:
>>>>> While cutting the lists we sometimes accidentally added a list_head from
>>>>> the stack to the LRUs, effectively corrupting the list.
>>>>>
>>>>> Remove the list cutting and use explicit list manipulation instead.
>>>> This patch actually fixes the corruption bug. Was it a defect of
>>>> list_cut_position or list_splice handlers?
>>> We somehow did something illegal with list_cut_position. I haven't
>>> narrowed it down till the end, but we ended up with list_heads from the
>>> stack to the lru.
>> I am confused, in theory, even we do any manipulation with list helper, it
>> should not trigger the list corruption. The usage of those helpers should
>> ensure the list operation safely...
> There's nothing the helpers can do about being passed in pointers to
> stack memory. It's a bug in the code using the helpers.

Actually I'm not 100% sure of that. To me it looks like we hit a corner 
case list_cut_position doesn't support.

Or we indeed had a logic error in how we called it, anyway the explicit 
implementation only uses 6 assignments and so is much easier to handle.

Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail
       [not found]             ` <4bf23786-ffdb-ee6e-1446-9dbc8d8ed5d6-otUistvHUpPR7s880joybQ@public.gmane.org>
  2018-09-14 10:46               ` Christian König
@ 2018-09-17  9:40               ` Huang Rui
  1 sibling, 0 replies; 14+ messages in thread
From: Huang Rui @ 2018-09-17  9:40 UTC (permalink / raw)
  To: Michel Dänzer
  Cc: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Fri, Sep 14, 2018 at 05:22:16PM +0800, Michel Dänzer wrote:
> On 2018-09-14 10:22 a.m., Huang Rui wrote:
> > On Thu, Sep 13, 2018 at 07:32:24PM +0800, Christian König wrote:
> >> Am 13.09.2018 um 10:31 schrieb Huang Rui:
> >>> On Wed, Sep 12, 2018 at 09:23:55PM +0200, Christian König wrote:
> >>>> While cutting the lists we sometimes accidentally added a list_head from
> >>>> the stack to the LRUs, effectively corrupting the list.
> >>>>
> >>>> Remove the list cutting and use explicit list manipulation instead.
> >>> This patch actually fixes the corruption bug. Was it a defect of
> >>> list_cut_position or list_splice handlers?
> >>
> >> We somehow did something illegal with list_cut_position. I haven't 
> >> narrowed it down till the end, but we ended up with list_heads from the 
> >> stack to the lru.
> > 
> > I am confused, in theory, even we do any manipulation with list helper, it
> > should not trigger the list corruption. The usage of those helpers should
> > ensure the list operation safely...
> 
> There's nothing the helpers can do about being passed in pointers to
> stack memory. It's a bug in the code using the helpers.
> 

Actually, I was checking carefully with list cut and splice in our case,
and it didn't find any illegal use. However, I also agree with the explicit
list manipulation is more clear and simple for now.

Thanks,
Ray
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2018-09-17  9:40 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-12 19:23 [PATCH] drm/ttm: once more fix ttm_bo_bulk_move_lru_tail Christian König
2018-09-13  8:35 ` Michel Dänzer
2018-09-13  8:55   ` Christian König
2018-09-13  9:00     ` Michel Dänzer
     [not found] ` <20180912192355.2359-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-09-12 19:42   ` Alex Deucher
2018-09-13  8:31   ` Huang Rui
2018-09-13 11:32     ` Christian König
     [not found]       ` <c2471ef9-1bc7-517e-27f6-a00236a9b40b-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-09-14  8:22         ` Huang Rui
2018-09-14  9:22           ` Michel Dänzer
     [not found]             ` <4bf23786-ffdb-ee6e-1446-9dbc8d8ed5d6-otUistvHUpPR7s880joybQ@public.gmane.org>
2018-09-14 10:46               ` Christian König
2018-09-17  9:40               ` Huang Rui
2018-09-13  8:35   ` Michel Dänzer
2018-09-13 14:25   ` Alex Deucher
     [not found]     ` <CADnq5_PiVfsBjGxN0wsbHL0o5rWYRuVehTeZvN396kp=GVZTNA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2018-09-13 17:29       ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.