All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper
@ 2018-08-31 13:10 Christian König
       [not found] ` <20180831131019.2486-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2018-08-31 13:10 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: michel.daenzer-5C7GfCeVMHo

Staring at the function for six hours, just to essentially move one line
of code.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 35d53d81f486..138c98902033 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -250,15 +250,18 @@ EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
 static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
 				    struct list_head *lru, bool is_swap)
 {
+	struct list_head *list;
 	LIST_HEAD(entries);
 	LIST_HEAD(before);
-	struct list_head *list1, *list2;
 
-	list1 = is_swap ? &pos->last->swap : &pos->last->lru;
-	list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
+	reservation_object_assert_held(pos->last->resv);
+	list = is_swap ? &pos->last->swap : &pos->last->lru;
+	list_cut_position(&entries, lru, list);
+
+	reservation_object_assert_held(pos->first->resv);
+	list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
+	list_cut_position(&before, &entries, list);
 
-	list_cut_position(&entries, lru, list1);
-	list_cut_position(&before, &entries, list2);
 	list_splice(&before, lru);
 	list_splice_tail(&entries, lru);
 }
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/3] drm/amdgpu: fix "use bulk moves for efficient VM LRU handling" v2
       [not found] ` <20180831131019.2486-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-08-31 13:10   ` Christian König
       [not found]     ` <20180831131019.2486-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-08-31 13:10   ` [PATCH 3/3] drm/amdgpu: fix idle state and bulk_moveavle flag Christian König
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2018-08-31 13:10 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: michel.daenzer-5C7GfCeVMHo

First step to fix the LRU corruption, we accidentially tried to move things
on the LRU after dropping the lock.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dd734970e167..349dcc37ee64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1237,6 +1237,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	ring = to_amdgpu_ring(entity->rq->sched);
 	amdgpu_ring_priority_get(ring, priority);
 
+	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
+
 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
 	amdgpu_mn_unlock(p->mn);
 
@@ -1258,7 +1260,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	union drm_amdgpu_cs *cs = data;
 	struct amdgpu_cs_parser parser = {};
 	bool reserved_buffers = false;
-	struct amdgpu_fpriv *fpriv;
 	int i, r;
 
 	if (!adev->accel_working)
@@ -1303,8 +1304,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
 	r = amdgpu_cs_submit(&parser, cs);
 
-	fpriv = filp->driver_priv;
-	amdgpu_vm_move_to_lru_tail(adev, &fpriv->vm);
 out:
 	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
 	return r;
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/3] drm/amdgpu: fix idle state and bulk_moveavle flag
       [not found] ` <20180831131019.2486-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-08-31 13:10   ` [PATCH 2/3] drm/amdgpu: fix "use bulk moves for efficient VM LRU handling" v2 Christian König
@ 2018-08-31 13:10   ` Christian König
       [not found]     ` <20180831131019.2486-3-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-08-31 15:15   ` [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper Michel Dänzer
  2018-09-03  2:05   ` Zhang, Jerry (Junwei)
  3 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2018-08-31 13:10 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: michel.daenzer-5C7GfCeVMHo

Add BOs to the idle state again and correctly clear the flag when
new BOs are added.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f31fa351caba..d59222fb5931 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -156,12 +156,15 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 		return;
 	list_add_tail(&base->bo_list, &bo->va);
 
-	if (bo->tbo.type == ttm_bo_type_kernel)
-		list_move(&base->vm_status, &vm->relocated);
-
 	if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
 		return;
 
+	vm->bulk_moveable = false;
+	if (bo->tbo.type == ttm_bo_type_kernel)
+		list_move(&base->vm_status, &vm->relocated);
+	else
+		list_move(&base->vm_status, &vm->idle);
+
 	if (bo->preferred_domains &
 	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
 		return;
@@ -1121,7 +1124,7 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 					   struct amdgpu_vm_bo_base,
 					   vm_status);
 		bo_base->moved = false;
-		list_del_init(&bo_base->vm_status);
+		list_move(&bo_base->vm_status, &vm->idle);
 
 		bo = bo_base->bo->parent;
 		if (!bo)
@@ -2646,7 +2649,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		return r;
 
 	vm->pte_support_ats = false;
-	vm->bulk_moveable = true;
 
 	if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
 		vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
-- 
2.14.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper
       [not found] ` <20180831131019.2486-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
  2018-08-31 13:10   ` [PATCH 2/3] drm/amdgpu: fix "use bulk moves for efficient VM LRU handling" v2 Christian König
  2018-08-31 13:10   ` [PATCH 3/3] drm/amdgpu: fix idle state and bulk_moveavle flag Christian König
@ 2018-08-31 15:15   ` Michel Dänzer
       [not found]     ` <a186be44-e64b-f408-fa30-bb81b59322f6-otUistvHUpPR7s880joybQ@public.gmane.org>
  2018-09-03  2:05   ` Zhang, Jerry (Junwei)
  3 siblings, 1 reply; 14+ messages in thread
From: Michel Dänzer @ 2018-08-31 15:15 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2018-08-31 3:10 p.m., Christian König wrote:
> Staring at the function for six hours, just to essentially move one line
> of code.

That sucks, but the commit log should describe what the problem was and
how this patch solves it.


> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>  drivers/gpu/drm/ttm/ttm_bo.c | 13 ++++++++-----
>  1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 35d53d81f486..138c98902033 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -250,15 +250,18 @@ EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>  static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
>  				    struct list_head *lru, bool is_swap)
>  {
> +	struct list_head *list;
>  	LIST_HEAD(entries);
>  	LIST_HEAD(before);
> -	struct list_head *list1, *list2;
>  
> -	list1 = is_swap ? &pos->last->swap : &pos->last->lru;
> -	list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
> +	reservation_object_assert_held(pos->last->resv);
> +	list = is_swap ? &pos->last->swap : &pos->last->lru;
> +	list_cut_position(&entries, lru, list);
> +
> +	reservation_object_assert_held(pos->first->resv);
> +	list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
> +	list_cut_position(&before, &entries, list);

So the problem was that the first list_cut_position call could result in
list2 pointing to la-la-land? Good catch!


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper
       [not found]     ` <a186be44-e64b-f408-fa30-bb81b59322f6-otUistvHUpPR7s880joybQ@public.gmane.org>
@ 2018-08-31 15:17       ` Christian König
       [not found]         ` <c8d0d5c6-1221-26be-4005-2f7ae21ade80-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2018-08-31 15:17 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 31.08.2018 um 17:15 schrieb Michel Dänzer:
> On 2018-08-31 3:10 p.m., Christian König wrote:
>> Staring at the function for six hours, just to essentially move one line
>> of code.
> That sucks, but the commit log should describe what the problem was and
> how this patch solves it.
>
>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo.c | 13 ++++++++-----
>>   1 file changed, 8 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>> index 35d53d81f486..138c98902033 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>> @@ -250,15 +250,18 @@ EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>>   static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
>>   				    struct list_head *lru, bool is_swap)
>>   {
>> +	struct list_head *list;
>>   	LIST_HEAD(entries);
>>   	LIST_HEAD(before);
>> -	struct list_head *list1, *list2;
>>   
>> -	list1 = is_swap ? &pos->last->swap : &pos->last->lru;
>> -	list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>> +	reservation_object_assert_held(pos->last->resv);
>> +	list = is_swap ? &pos->last->swap : &pos->last->lru;
>> +	list_cut_position(&entries, lru, list);
>> +
>> +	reservation_object_assert_held(pos->first->resv);
>> +	list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>> +	list_cut_position(&before, &entries, list);
> So the problem was that the first list_cut_position call could result in
> list2 pointing to la-la-land? Good catch!

Yes, exactly. Thought that would be obvious, but going to add that to 
the commit log.

Can I get a tested-by? You where much better at reproducing that than I'm.

Christian.
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: fix idle state and bulk_moveavle flag
       [not found]     ` <20180831131019.2486-3-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-08-31 15:19       ` Michel Dänzer
  0 siblings, 0 replies; 14+ messages in thread
From: Michel Dänzer @ 2018-08-31 15:19 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2018-08-31 3:10 p.m., Christian König wrote:
> Add BOs to the idle state again and correctly clear the flag when
> new BOs are added.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Typo in the shortlog: bulk_moveavle -> bulk_moveable


The series is

Tested-by: Michel Dänzer <michel.daenzer@amd.com>

Thanks Christian!


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper
       [not found] ` <20180831131019.2486-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2018-08-31 15:15   ` [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper Michel Dänzer
@ 2018-09-03  2:05   ` Zhang, Jerry (Junwei)
  3 siblings, 0 replies; 14+ messages in thread
From: Zhang, Jerry (Junwei) @ 2018-09-03  2:05 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: michel.daenzer-5C7GfCeVMHo

On 08/31/2018 09:10 PM, Christian König wrote:
> Staring at the function for six hours, just to essentially move one line
> of code.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>

> ---
>   drivers/gpu/drm/ttm/ttm_bo.c | 13 ++++++++-----
>   1 file changed, 8 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 35d53d81f486..138c98902033 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -250,15 +250,18 @@ EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>   static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
>   				    struct list_head *lru, bool is_swap)
>   {
> +	struct list_head *list;
>   	LIST_HEAD(entries);
>   	LIST_HEAD(before);
> -	struct list_head *list1, *list2;
>
> -	list1 = is_swap ? &pos->last->swap : &pos->last->lru;
> -	list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
> +	reservation_object_assert_held(pos->last->resv);
> +	list = is_swap ? &pos->last->swap : &pos->last->lru;
> +	list_cut_position(&entries, lru, list);
> +
> +	reservation_object_assert_held(pos->first->resv);
> +	list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
> +	list_cut_position(&before, &entries, list);
>
> -	list_cut_position(&entries, lru, list1);
> -	list_cut_position(&before, &entries, list2);
>   	list_splice(&before, lru);
>   	list_splice_tail(&entries, lru);
>   }
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] drm/amdgpu: fix "use bulk moves for efficient VM LRU handling" v2
       [not found]     ` <20180831131019.2486-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-03  2:05       ` Zhang, Jerry (Junwei)
  0 siblings, 0 replies; 14+ messages in thread
From: Zhang, Jerry (Junwei) @ 2018-09-03  2:05 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: michel.daenzer-5C7GfCeVMHo

On 08/31/2018 09:10 PM, Christian König wrote:
> First step to fix the LRU corruption, we accidentially tried to move things
> on the LRU after dropping the lock.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 5 ++---
>   1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index dd734970e167..349dcc37ee64 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1237,6 +1237,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   	ring = to_amdgpu_ring(entity->rq->sched);
>   	amdgpu_ring_priority_get(ring, priority);
>
> +	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
> +
>   	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>   	amdgpu_mn_unlock(p->mn);
>
> @@ -1258,7 +1260,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   	union drm_amdgpu_cs *cs = data;
>   	struct amdgpu_cs_parser parser = {};
>   	bool reserved_buffers = false;
> -	struct amdgpu_fpriv *fpriv;
>   	int i, r;
>
>   	if (!adev->accel_working)
> @@ -1303,8 +1304,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>
>   	r = amdgpu_cs_submit(&parser, cs);
>
> -	fpriv = filp->driver_priv;
> -	amdgpu_vm_move_to_lru_tail(adev, &fpriv->vm);
>   out:
>   	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
>   	return r;
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper
       [not found]         ` <c8d0d5c6-1221-26be-4005-2f7ae21ade80-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2018-09-06 10:02           ` Huang Rui
  2018-09-06 10:06             ` Christian König
  0 siblings, 1 reply; 14+ messages in thread
From: Huang Rui @ 2018-09-06 10:02 UTC (permalink / raw)
  To: christian.koenig-5C7GfCeVMHo
  Cc: Michel Dänzer, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Fri, Aug 31, 2018 at 05:17:33PM +0200, Christian König wrote:
> Am 31.08.2018 um 17:15 schrieb Michel Dänzer:
> >On 2018-08-31 3:10 p.m., Christian König wrote:
> >>Staring at the function for six hours, just to essentially move one line
> >>of code.
> >That sucks, but the commit log should describe what the problem was and
> >how this patch solves it.
> >
> >
> >>Signed-off-by: Christian König <christian.koenig@amd.com>
> >>---
> >>  drivers/gpu/drm/ttm/ttm_bo.c | 13 ++++++++-----
> >>  1 file changed, 8 insertions(+), 5 deletions(-)
> >>
> >>diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> >>index 35d53d81f486..138c98902033 100644
> >>--- a/drivers/gpu/drm/ttm/ttm_bo.c
> >>+++ b/drivers/gpu/drm/ttm/ttm_bo.c
> >>@@ -250,15 +250,18 @@ EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
> >>  static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
> >>  				    struct list_head *lru, bool is_swap)
> >>  {
> >>+	struct list_head *list;
> >>  	LIST_HEAD(entries);
> >>  	LIST_HEAD(before);
> >>-	struct list_head *list1, *list2;
> >>-	list1 = is_swap ? &pos->last->swap : &pos->last->lru;
> >>-	list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
> >>+	reservation_object_assert_held(pos->last->resv);
> >>+	list = is_swap ? &pos->last->swap : &pos->last->lru;
> >>+	list_cut_position(&entries, lru, list);
> >>+
> >>+	reservation_object_assert_held(pos->first->resv);
> >>+	list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
> >>+	list_cut_position(&before, &entries, list);
> >So the problem was that the first list_cut_position call could result in
> >list2 pointing to la-la-land? Good catch!
> 
> Yes, exactly. Thought that would be obvious, but going to add that
> to the commit log.
> 
> Can I get a tested-by? You where much better at reproducing that than I'm.
> 

Michel, Christian, thanks so much to take care of this when I was on
vacation. And sorry to let you take a long time for finding the cause.

Is that because I didn't hold the resveration before cut the list from
position "first" and "last"? May I know in which cases, we must hold the
bo's reservation firstly?

Thanks,
Ray


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper
  2018-09-06 10:02           ` Huang Rui
@ 2018-09-06 10:06             ` Christian König
       [not found]               ` <3be7e67a-2ffe-21c0-2160-69a330413858-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2018-09-06 10:06 UTC (permalink / raw)
  To: Huang Rui; +Cc: Michel Dänzer, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 06.09.2018 um 12:02 schrieb Huang Rui:
> On Fri, Aug 31, 2018 at 05:17:33PM +0200, Christian König wrote:
>> Am 31.08.2018 um 17:15 schrieb Michel Dänzer:
>>> On 2018-08-31 3:10 p.m., Christian König wrote:
>>>> Staring at the function for six hours, just to essentially move one line
>>>> of code.
>>> That sucks, but the commit log should describe what the problem was and
>>> how this patch solves it.
>>>
>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/ttm/ttm_bo.c | 13 ++++++++-----
>>>>   1 file changed, 8 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> index 35d53d81f486..138c98902033 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> @@ -250,15 +250,18 @@ EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>>>>   static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
>>>>   				    struct list_head *lru, bool is_swap)
>>>>   {
>>>> +	struct list_head *list;
>>>>   	LIST_HEAD(entries);
>>>>   	LIST_HEAD(before);
>>>> -	struct list_head *list1, *list2;
>>>> -	list1 = is_swap ? &pos->last->swap : &pos->last->lru;
>>>> -	list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>>>> +	reservation_object_assert_held(pos->last->resv);
>>>> +	list = is_swap ? &pos->last->swap : &pos->last->lru;
>>>> +	list_cut_position(&entries, lru, list);
>>>> +
>>>> +	reservation_object_assert_held(pos->first->resv);
>>>> +	list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>>>> +	list_cut_position(&before, &entries, list);
>>> So the problem was that the first list_cut_position call could result in
>>> list2 pointing to la-la-land? Good catch!
>> Yes, exactly. Thought that would be obvious, but going to add that
>> to the commit log.
>>
>> Can I get a tested-by? You where much better at reproducing that than I'm.
>>
> Michel, Christian, thanks so much to take care of this when I was on
> vacation. And sorry to let you take a long time for finding the cause.
>
> Is that because I didn't hold the resveration before cut the list from
> position "first" and "last"?

Yes, that was one problem. Another was that the cutting code was buggy 
and determined one of the positions to cut at the wrong time.

>   May I know in which cases, we must hold the
> bo's reservation firstly?

BOs are reserved to prevent moving them. E.g. when the BO isn't reserved 
it can move around and so the LRU where you want to add/remove it could 
change.

Christian.

>
> Thanks,
> Ray
>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper
       [not found]               ` <3be7e67a-2ffe-21c0-2160-69a330413858-5C7GfCeVMHo@public.gmane.org>
@ 2018-09-07 11:02                 ` Huang, Ray
       [not found]                   ` <CY1PR12MB0043539F7EE805B3A98E1FE2EC000-1s8aH8ViOEdbUNsZNX5b0gdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  0 siblings, 1 reply; 14+ messages in thread
From: Huang, Ray @ 2018-09-07 11:02 UTC (permalink / raw)
  To: Koenig, Christian
  Cc: Michel Dänzer, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

[-- Attachment #1: Type: text/plain, Size: 4061 bytes --]


>> Yes, that was one problem. Another was that the cutting code was buggy 
>>  and determined one of the positions to cut at the wrong time.

I went through again about the list cutting behavior and wrote down with the attached picture.
After do the second list_cut_position, the list2 should be point the end of "before" list. And list2 won't be used anymore after list cutting. May I know am I something missed?  

Thanks,
Ray

From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> on behalf of Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
Sent: Thursday, September 6, 2018 6:06 PM
To: Huang, Ray
Cc: Michel Dänzer; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: Re: [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper
  

Am 06.09.2018 um 12:02 schrieb Huang Rui:
> On Fri, Aug 31, 2018 at 05:17:33PM +0200, Christian König wrote:
>> Am 31.08.2018 um 17:15 schrieb Michel Dänzer:
>>> On 2018-08-31 3:10 p.m., Christian König wrote:
>>>> Staring at the function for six hours, just to essentially move one line
>>>> of code.
>>> That sucks, but the commit log should describe what the problem was and
>>> how this patch solves it.
>>>
>>>
>>>> Signed-off-by: Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
>>>> ---
>>>>   drivers/gpu/drm/ttm/ttm_bo.c | 13 ++++++++-----
>>>>   1 file changed, 8 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> index 35d53d81f486..138c98902033 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>> @@ -250,15 +250,18 @@ EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>>>>   static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
>>>>                                 struct list_head *lru, bool is_swap)
>>>>   {
>>>> +  struct list_head *list;
>>>>     LIST_HEAD(entries);
>>>>     LIST_HEAD(before);
>>>> -  struct list_head *list1, *list2;
>>>> -  list1 = is_swap ? &pos->last->swap : &pos->last->lru;
>>>> -  list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>>>> +  reservation_object_assert_held(pos->last->resv);
>>>> +  list = is_swap ? &pos->last->swap : &pos->last->lru;
>>>> +  list_cut_position(&entries, lru, list);
>>>> +
>>>> +  reservation_object_assert_held(pos->first->resv);
>>>> +  list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>>>> +  list_cut_position(&before, &entries, list);
>>> So the problem was that the first list_cut_position call could result in
>>> list2 pointing to la-la-land? Good catch!
>> Yes, exactly. Thought that would be obvious, but going to add that
>> to the commit log.
>>
>> Can I get a tested-by? You where much better at reproducing that than I'm.
>>
> Michel, Christian, thanks so much to take care of this when I was on
> vacation. And sorry to let you take a long time for finding the cause.
>
> Is that because I didn't hold the resveration before cut the list from
> position "first" and "last"?

Yes, that was one problem. Another was that the cutting code was buggy 
and determined one of the positions to cut at the wrong time.

>   May I know in which cases, we must hold the
> bo's reservation firstly?

BOs are reserved to prevent moving them. E.g. when the BO isn't reserved 
it can move around and so the LRU where you want to add/remove it could 
change.

Christian.

>
> Thanks,
> Ray
>
>

_______________________________________________
amd-gfx mailing list
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


amd-gfx Info Page - freedesktop.org
lists.freedesktop.org
To see the collection of prior postings to the list, visit the amd-gfx Archives.. Using amd-gfx: To post a message to all the list members, send email to amd-gfx-PD4FTy7X32lNgt0PjOBp9xlNPtJONSTn@public.gmane.org You can subscribe to the list, or change your existing subscription, in the sections below.

    

[-- Attachment #2: list_cutting.jpg --]
[-- Type: image/jpeg, Size: 107697 bytes --]

[-- Attachment #3: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper
       [not found]                   ` <CY1PR12MB0043539F7EE805B3A98E1FE2EC000-1s8aH8ViOEdbUNsZNX5b0gdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2018-09-07 11:18                     ` Christian König
       [not found]                       ` <24dbe7c4-63df-b8a7-28c3-df0e9866841f-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2018-09-07 11:18 UTC (permalink / raw)
  To: Huang, Ray, Koenig, Christian
  Cc: Michel Dänzer, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 4793 bytes --]

Am 07.09.2018 um 13:02 schrieb Huang, Ray:
>>>   Yes, that was one problem. Another was that the cutting code was buggy
>>>    and determined one of the positions to cut at the wrong time.
> I went through again about the list cutting behavior and wrote down with the attached picture.
> After do the second list_cut_position, the list2 should be point the end of "before" list. And list2 won't be used anymore after list cutting. May I know am I something missed?

Let's take a look at the original code:
> list1 = is_swap ? &pos->last->swap : &pos->last->lru;
> list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>
> list_cut_position(&entries, lru, list1);
> list_cut_position(&before, &entries, list2);

As far as I can see the problem is that the first list_cur_position 
could modify the value of pos->first->lru.prev and so make the second 
list_cut_position work on the wrong position.

Regards,
Christian.

>
> Thanks,
> Ray
>
> From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> on behalf of Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
> Sent: Thursday, September 6, 2018 6:06 PM
> To: Huang, Ray
> Cc: Michel Dänzer; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> Subject: Re: [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper
>    
>
> Am 06.09.2018 um 12:02 schrieb Huang Rui:
>> On Fri, Aug 31, 2018 at 05:17:33PM +0200, Christian König wrote:
>>> Am 31.08.2018 um 17:15 schrieb Michel Dänzer:
>>>> On 2018-08-31 3:10 p.m., Christian König wrote:
>>>>> Staring at the function for six hours, just to essentially move one line
>>>>> of code.
>>>> That sucks, but the commit log should describe what the problem was and
>>>> how this patch solves it.
>>>>
>>>>
>>>>> Signed-off-by: Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
>>>>> ---
>>>>>     drivers/gpu/drm/ttm/ttm_bo.c | 13 ++++++++-----
>>>>>     1 file changed, 8 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>> index 35d53d81f486..138c98902033 100644
>>>>> --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>>>> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>>>> @@ -250,15 +250,18 @@ EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>>>>>     static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
>>>>>                                   struct list_head *lru, bool is_swap)
>>>>>     {
>>>>> +  struct list_head *list;
>>>>>       LIST_HEAD(entries);
>>>>>       LIST_HEAD(before);
>>>>> -  struct list_head *list1, *list2;
>>>>> -  list1 = is_swap ? &pos->last->swap : &pos->last->lru;
>>>>> -  list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>>>>> +  reservation_object_assert_held(pos->last->resv);
>>>>> +  list = is_swap ? &pos->last->swap : &pos->last->lru;
>>>>> +  list_cut_position(&entries, lru, list);
>>>>> +
>>>>> +  reservation_object_assert_held(pos->first->resv);
>>>>> +  list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>>>>> +  list_cut_position(&before, &entries, list);
>>>> So the problem was that the first list_cut_position call could result in
>>>> list2 pointing to la-la-land? Good catch!
>>> Yes, exactly. Thought that would be obvious, but going to add that
>>> to the commit log.
>>>
>>> Can I get a tested-by? You where much better at reproducing that than I'm.
>>>
>> Michel, Christian, thanks so much to take care of this when I was on
>> vacation. And sorry to let you take a long time for finding the cause.
>>
>> Is that because I didn't hold the resveration before cut the list from
>> position "first" and "last"?
> Yes, that was one problem. Another was that the cutting code was buggy
> and determined one of the positions to cut at the wrong time.
>
>>     May I know in which cases, we must hold the
>> bo's reservation firstly?
> BOs are reserved to prevent moving them. E.g. when the BO isn't reserved
> it can move around and so the LRU where you want to add/remove it could
> change.
>
> Christian.
>
>> Thanks,
>> Ray
>>
>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
> amd-gfx Info Page - freedesktop.org
> lists.freedesktop.org
> To see the collection of prior postings to the list, visit the amd-gfx Archives.. Using amd-gfx: To post a message to all the list members, send email to amd-gfx-PD4FTy7X32lNgt0PjOBp9xlNPtJONSTn@public.gmane.org You can subscribe to the list, or change your existing subscription, in the sections below.
>
>      
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[-- Attachment #1.2: Type: text/html, Size: 7434 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper
       [not found]                       ` <24dbe7c4-63df-b8a7-28c3-df0e9866841f-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2018-09-08  7:54                         ` Huang Rui
  2018-09-08 10:43                           ` Christian König
  0 siblings, 1 reply; 14+ messages in thread
From: Huang Rui @ 2018-09-08  7:54 UTC (permalink / raw)
  To: Koenig, Christian
  Cc: Michel D�nzer, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

[-- Attachment #1: Type: text/plain, Size: 6406 bytes --]

On Fri, Sep 07, 2018 at 07:18:59PM +0800, Christian K�nig wrote:
> Am 07.09.2018 um 13:02 schrieb Huang, Ray:
> 
>              Yes, that was one problem. Another was that the cutting code was buggy 
>               and determined one of the positions to cut at the wrong time.
> 
>     I went through again about the list cutting behavior and wrote down with the attached picture.
>     After do the second list_cut_position, the list2 should be point the end of "before" list. And list2 won't be used anymore after list cutting. May I know am I something missed? 
> 
> 
> Let's take a look at the original code:
> 
>     list1 = is_swap ? &pos->last->swap : &pos->last->lru;
>     list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>      
>     list_cut_position(&entries, lru, list1);
>     list_cut_position(&before, &entries, list2);
> 
> 
> As far as I can see the problem is that the first list_cur_position could
> modify the value of pos->first->lru.prev and so make the second
> list_cut_position work on the wrong position.
> 

I think I understood. In this case (the first element of LRU ==
pos->first->lru), please see the picture. If we store first->lru.prev as
list2(LRU head), after do list cutting, the first->lru.prev will overwrite
as new head (entries), however, the orignal list2 will still point previous
head (that is the wrong position now). We actually expected to use the
latest first->lru.prev as the second cutting position. So we should adjust
code sequence like below:

list1 = is_swap ? &pos->last->swap : &pos->last->lru;
list_cut_position(&entries, lru, list1);

list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
list_cut_position(&before, &entries, list2);

Am I understanding right?

Thanks,
Ray

> Regards,
> Christian.
> 
> 
> 
>     Thanks,
>     Ray
> 
>     From: amd-gfx <amd-gfx-bounces-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org> on behalf of Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
>     Sent: Thursday, September 6, 2018 6:06 PM
>     To: Huang, Ray
>     Cc: Michel Dänzer; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>     Subject: Re: [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper
>      
> 
>     Am 06.09.2018 um 12:02 schrieb Huang Rui:
> 
>         On Fri, Aug 31, 2018 at 05:17:33PM +0200, Christian König wrote:
> 
>             Am 31.08.2018 um 17:15 schrieb Michel Dänzer:
> 
>                 On 2018-08-31 3:10 p.m., Christian König wrote:
> 
>                     Staring at the function for six hours, just to essentially move one line
>                     of code.
> 
>                 That sucks, but the commit log should describe what the problem was and
>                 how this patch solves it.
> 
> 
> 
>                     Signed-off-by: Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
>                     ---
>                        drivers/gpu/drm/ttm/ttm_bo.c | 13 ++++++++-----
>                        1 file changed, 8 insertions(+), 5 deletions(-)
> 
>                     diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>                     index 35d53d81f486..138c98902033 100644
>                     --- a/drivers/gpu/drm/ttm/ttm_bo.c
>                     +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>                     @@ -250,15 +250,18 @@ EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>                        static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
>                                                      struct list_head *lru, bool is_swap)
>                        {
>                     +  struct list_head *list;
>                          LIST_HEAD(entries);
>                          LIST_HEAD(before);
>                     -  struct list_head *list1, *list2;
>                     -  list1 = is_swap ? &pos->last->swap : &pos->last->lru;
>                     -  list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>                     +  reservation_object_assert_held(pos->last->resv);
>                     +  list = is_swap ? &pos->last->swap : &pos->last->lru;
>                     +  list_cut_position(&entries, lru, list);
>                     +
>                     +  reservation_object_assert_held(pos->first->resv);
>                     +  list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>                     +  list_cut_position(&before, &entries, list);
> 
>                 So the problem was that the first list_cut_position call could result in
>                 list2 pointing to la-la-land? Good catch!
> 
>             Yes, exactly. Thought that would be obvious, but going to add that
>             to the commit log.
> 
>             Can I get a tested-by? You where much better at reproducing that than I'm.
> 
> 
>         Michel, Christian, thanks so much to take care of this when I was on
>         vacation. And sorry to let you take a long time for finding the cause.
> 
>         Is that because I didn't hold the resveration before cut the list from
>         position "first" and "last"?
> 
>     Yes, that was one problem. Another was that the cutting code was buggy
>     and determined one of the positions to cut at the wrong time.
> 
> 
>            May I know in which cases, we must hold the
>         bo's reservation firstly?
> 
>     BOs are reserved to prevent moving them. E.g. when the BO isn't reserved
>     it can move around and so the LRU where you want to add/remove it could
>     change.
> 
>     Christian.
> 
> 
>         Thanks,
>         Ray
> 
> 
> 
>     _______________________________________________
>     amd-gfx mailing list
>     amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>     https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 
> 
>     amd-gfx Info Page - freedesktop.org
>     lists.freedesktop.org
>     To see the collection of prior postings to the list, visit the amd-gfx Archives.. Using amd-gfx: To post a message to all the list members, send email to amd-gfx-PD4FTy7X32lNgt0PjOBp9xlNPtJONSTn@public.gmane.org You can subscribe to the list, or change your existing subscription, in the sections below.
> 
> 
> 
> 
>    
>     _______________________________________________
>     amd-gfx mailing list
>     amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>     https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 
> 

[-- Attachment #2: Wrong_case.jpg --]
[-- Type: image/jpeg, Size: 70757 bytes --]

[-- Attachment #3: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper
  2018-09-08  7:54                         ` Huang Rui
@ 2018-09-08 10:43                           ` Christian König
  0 siblings, 0 replies; 14+ messages in thread
From: Christian König @ 2018-09-08 10:43 UTC (permalink / raw)
  To: Huang Rui; +Cc: Michel D�nzer, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 08.09.2018 um 09:54 schrieb Huang Rui:
> On Fri, Sep 07, 2018 at 07:18:59PM +0800, Christian K�nig wrote:
>> Am 07.09.2018 um 13:02 schrieb Huang, Ray:
>>
>>               Yes, that was one problem. Another was that the cutting code was buggy
>>                and determined one of the positions to cut at the wrong time.
>>
>>      I went through again about the list cutting behavior and wrote down with the attached picture.
>>      After do the second list_cut_position, the list2 should be point the end of "before" list. And list2 won't be used anymore after list cutting. May I know am I something missed?
>>
>>
>> Let's take a look at the original code:
>>
>>      list1 = is_swap ? &pos->last->swap : &pos->last->lru;
>>      list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>>       
>>      list_cut_position(&entries, lru, list1);
>>      list_cut_position(&before, &entries, list2);
>>
>>
>> As far as I can see the problem is that the first list_cur_position could
>> modify the value of pos->first->lru.prev and so make the second
>> list_cut_position work on the wrong position.
>>
> I think I understood. In this case (the first element of LRU ==
> pos->first->lru), please see the picture. If we store first->lru.prev as
> list2(LRU head), after do list cutting, the first->lru.prev will overwrite
> as new head (entries), however, the orignal list2 will still point previous
> head (that is the wrong position now). We actually expected to use the
> latest first->lru.prev as the second cutting position. So we should adjust
> code sequence like below:
>
> list1 = is_swap ? &pos->last->swap : &pos->last->lru;
> list_cut_position(&entries, lru, list1);
>
> list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
> list_cut_position(&before, &entries, list2);
>
> Am I understanding right?

Yes, exactly. And the fix looks like what I did as well. Just only used 
one variable and added some extra BUG_ON().

Christian.

>
> Thanks,
> Ray
>
>> Regards,
>> Christian.
>>
>>
>>
>>      Thanks,
>>      Ray
>>
>>      From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> on behalf of Christian König <christian.koenig@amd.com>
>>      Sent: Thursday, September 6, 2018 6:06 PM
>>      To: Huang, Ray
>>      Cc: Michel Dänzer; amd-gfx@lists.freedesktop.org
>>      Subject: Re: [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper
>>       
>>
>>      Am 06.09.2018 um 12:02 schrieb Huang Rui:
>>
>>          On Fri, Aug 31, 2018 at 05:17:33PM +0200, Christian König wrote:
>>
>>              Am 31.08.2018 um 17:15 schrieb Michel Dänzer:
>>
>>                  On 2018-08-31 3:10 p.m., Christian König wrote:
>>
>>                      Staring at the function for six hours, just to essentially move one line
>>                      of code.
>>
>>                  That sucks, but the commit log should describe what the problem was and
>>                  how this patch solves it.
>>
>>
>>
>>                      Signed-off-by: Christian König <christian.koenig@amd.com>
>>                      ---
>>                         drivers/gpu/drm/ttm/ttm_bo.c | 13 ++++++++-----
>>                         1 file changed, 8 insertions(+), 5 deletions(-)
>>
>>                      diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
>>                      index 35d53d81f486..138c98902033 100644
>>                      --- a/drivers/gpu/drm/ttm/ttm_bo.c
>>                      +++ b/drivers/gpu/drm/ttm/ttm_bo.c
>>                      @@ -250,15 +250,18 @@ EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
>>                         static void ttm_bo_bulk_move_helper(struct ttm_lru_bulk_move_pos *pos,
>>                                                       struct list_head *lru, bool is_swap)
>>                         {
>>                      +  struct list_head *list;
>>                           LIST_HEAD(entries);
>>                           LIST_HEAD(before);
>>                      -  struct list_head *list1, *list2;
>>                      -  list1 = is_swap ? &pos->last->swap : &pos->last->lru;
>>                      -  list2 = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>>                      +  reservation_object_assert_held(pos->last->resv);
>>                      +  list = is_swap ? &pos->last->swap : &pos->last->lru;
>>                      +  list_cut_position(&entries, lru, list);
>>                      +
>>                      +  reservation_object_assert_held(pos->first->resv);
>>                      +  list = is_swap ? pos->first->swap.prev : pos->first->lru.prev;
>>                      +  list_cut_position(&before, &entries, list);
>>
>>                  So the problem was that the first list_cut_position call could result in
>>                  list2 pointing to la-la-land? Good catch!
>>
>>              Yes, exactly. Thought that would be obvious, but going to add that
>>              to the commit log.
>>
>>              Can I get a tested-by? You where much better at reproducing that than I'm.
>>
>>
>>          Michel, Christian, thanks so much to take care of this when I was on
>>          vacation. And sorry to let you take a long time for finding the cause.
>>
>>          Is that because I didn't hold the resveration before cut the list from
>>          position "first" and "last"?
>>
>>      Yes, that was one problem. Another was that the cutting code was buggy
>>      and determined one of the positions to cut at the wrong time.
>>
>>
>>             May I know in which cases, we must hold the
>>          bo's reservation firstly?
>>
>>      BOs are reserved to prevent moving them. E.g. when the BO isn't reserved
>>      it can move around and so the LRU where you want to add/remove it could
>>      change.
>>
>>      Christian.
>>
>>
>>          Thanks,
>>          Ray
>>
>>
>>
>>      _______________________________________________
>>      amd-gfx mailing list
>>      amd-gfx@lists.freedesktop.org
>>      https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>>
>>      amd-gfx Info Page - freedesktop.org
>>      lists.freedesktop.org
>>      To see the collection of prior postings to the list, visit the amd-gfx Archives.. Using amd-gfx: To post a message to all the list members, send email to amd-gfx@lists.freedesktop.org. You can subscribe to the list, or change your existing subscription, in the sections below.
>>
>>
>>
>>
>>     
>>      _______________________________________________
>>      amd-gfx mailing list
>>      amd-gfx@lists.freedesktop.org
>>      https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2018-09-08 10:43 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-08-31 13:10 [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper Christian König
     [not found] ` <20180831131019.2486-1-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-08-31 13:10   ` [PATCH 2/3] drm/amdgpu: fix "use bulk moves for efficient VM LRU handling" v2 Christian König
     [not found]     ` <20180831131019.2486-2-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-09-03  2:05       ` Zhang, Jerry (Junwei)
2018-08-31 13:10   ` [PATCH 3/3] drm/amdgpu: fix idle state and bulk_moveavle flag Christian König
     [not found]     ` <20180831131019.2486-3-christian.koenig-5C7GfCeVMHo@public.gmane.org>
2018-08-31 15:19       ` Michel Dänzer
2018-08-31 15:15   ` [PATCH 1/3] drm/ttm: fix ttm_bo_bulk_move_helper Michel Dänzer
     [not found]     ` <a186be44-e64b-f408-fa30-bb81b59322f6-otUistvHUpPR7s880joybQ@public.gmane.org>
2018-08-31 15:17       ` Christian König
     [not found]         ` <c8d0d5c6-1221-26be-4005-2f7ae21ade80-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-09-06 10:02           ` Huang Rui
2018-09-06 10:06             ` Christian König
     [not found]               ` <3be7e67a-2ffe-21c0-2160-69a330413858-5C7GfCeVMHo@public.gmane.org>
2018-09-07 11:02                 ` Huang, Ray
     [not found]                   ` <CY1PR12MB0043539F7EE805B3A98E1FE2EC000-1s8aH8ViOEdbUNsZNX5b0gdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2018-09-07 11:18                     ` Christian König
     [not found]                       ` <24dbe7c4-63df-b8a7-28c3-df0e9866841f-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2018-09-08  7:54                         ` Huang Rui
2018-09-08 10:43                           ` Christian König
2018-09-03  2:05   ` Zhang, Jerry (Junwei)

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.