All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: stop allocating PDs/PTs with the eviction lock held
@ 2020-02-12 15:14 Christian König
  2020-02-14 21:12 ` Felix Kuehling
  0 siblings, 1 reply; 4+ messages in thread
From: Christian König @ 2020-02-12 15:14 UTC (permalink / raw)
  To: alex.sierra, Felix.Kuehling, amd-gfx

We need to make sure to not allocate PDs/PTs while holding
the eviction lock or otherwise we will run into lock inversion
in the MM as soon as we enable the MMU notifier.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31 +++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 77c400675b79..e7ab0c1e2793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -897,27 +897,42 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	struct amdgpu_vm_pt *entry = cursor->entry;
 	struct amdgpu_bo_param bp;
 	struct amdgpu_bo *pt;
+	bool need_entries;
 	int r;
 
-	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
+	need_entries = cursor->level < AMDGPU_VM_PTB && !entry->entries;
+	if (!need_entries && entry->base.bo)
+		return 0;
+
+	/* We need to make sure that we don't allocate PDs/PTs while holding the
+	 * eviction lock or we run into lock recursion in the MM.
+	 */
+	amdgpu_vm_eviction_unlock(vm);
+
+	if (need_entries) {
 		unsigned num_entries;
 
 		num_entries = amdgpu_vm_num_entries(adev, cursor->level);
 		entry->entries = kvmalloc_array(num_entries,
 						sizeof(*entry->entries),
 						GFP_KERNEL | __GFP_ZERO);
-		if (!entry->entries)
-			return -ENOMEM;
+		if (!entry->entries) {
+			r = -ENOMEM;
+			goto error_lock;
+		}
 	}
 
-	if (entry->base.bo)
-		return 0;
+	if (entry->base.bo) {
+		r = 0;
+		goto error_lock;
+	}
 
 	amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp);
 
 	r = amdgpu_bo_create(adev, &bp, &pt);
+	amdgpu_vm_eviction_lock(vm);
 	if (r)
-		return r;
+		goto error_free_pt;
 
 	/* Keep a reference to the root directory to avoid
 	 * freeing them up in the wrong order.
@@ -936,6 +951,10 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	amdgpu_bo_unref(&pt);
 	entry->base.bo = NULL;
 	return r;
+
+error_lock:
+	amdgpu_vm_eviction_lock(vm);
+	return r;
 }
 
 /**
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdgpu: stop allocating PDs/PTs with the eviction lock held
  2020-02-12 15:14 [PATCH] drm/amdgpu: stop allocating PDs/PTs with the eviction lock held Christian König
@ 2020-02-14 21:12 ` Felix Kuehling
  2020-02-27 15:28   ` Christian König
  0 siblings, 1 reply; 4+ messages in thread
From: Felix Kuehling @ 2020-02-14 21:12 UTC (permalink / raw)
  To: Christian König, alex.sierra, amd-gfx

Now you allow eviction of page tables while you allocate page tables. 
Isn't the whole point of the eviction lock to prevent page table 
evictions while manipulating page tables?

Or does this only apply to PTE invalidations which never allocated 
memory? Is that the only case that doesn't reserve page tables?

Regards,
   Felix

On 2020-02-12 10:14 a.m., Christian König wrote:
> We need to make sure to not allocate PDs/PTs while holding
> the eviction lock or otherwise we will run into lock inversion
> in the MM as soon as we enable the MMU notifier.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31 +++++++++++++++++++++-----
>   1 file changed, 25 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 77c400675b79..e7ab0c1e2793 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -897,27 +897,42 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>   	struct amdgpu_vm_pt *entry = cursor->entry;
>   	struct amdgpu_bo_param bp;
>   	struct amdgpu_bo *pt;
> +	bool need_entries;
>   	int r;
>   
> -	if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
> +	need_entries = cursor->level < AMDGPU_VM_PTB && !entry->entries;
> +	if (!need_entries && entry->base.bo)
> +		return 0;
> +
> +	/* We need to make sure that we don't allocate PDs/PTs while holding the
> +	 * eviction lock or we run into lock recursion in the MM.
> +	 */
> +	amdgpu_vm_eviction_unlock(vm);
> +
> +	if (need_entries) {
>   		unsigned num_entries;
>   
>   		num_entries = amdgpu_vm_num_entries(adev, cursor->level);
>   		entry->entries = kvmalloc_array(num_entries,
>   						sizeof(*entry->entries),
>   						GFP_KERNEL | __GFP_ZERO);
> -		if (!entry->entries)
> -			return -ENOMEM;
> +		if (!entry->entries) {
> +			r = -ENOMEM;
> +			goto error_lock;
> +		}
>   	}
>   
> -	if (entry->base.bo)
> -		return 0;
> +	if (entry->base.bo) {
> +		r = 0;
> +		goto error_lock;
> +	}
>   
>   	amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp);
>   
>   	r = amdgpu_bo_create(adev, &bp, &pt);
> +	amdgpu_vm_eviction_lock(vm);
>   	if (r)
> -		return r;
> +		goto error_free_pt;
>   
>   	/* Keep a reference to the root directory to avoid
>   	 * freeing them up in the wrong order.
> @@ -936,6 +951,10 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
>   	amdgpu_bo_unref(&pt);
>   	entry->base.bo = NULL;
>   	return r;
> +
> +error_lock:
> +	amdgpu_vm_eviction_lock(vm);
> +	return r;
>   }
>   
>   /**
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdgpu: stop allocating PDs/PTs with the eviction lock held
  2020-02-14 21:12 ` Felix Kuehling
@ 2020-02-27 15:28   ` Christian König
  2020-02-27 18:43     ` Felix Kuehling
  0 siblings, 1 reply; 4+ messages in thread
From: Christian König @ 2020-02-27 15:28 UTC (permalink / raw)
  To: Felix Kuehling, alex.sierra, amd-gfx

Hi Felix,

so coming back to this after two weeks of distraction.

Am 14.02.20 um 22:12 schrieb Felix Kuehling:
> Now you allow eviction of page tables while you allocate page tables. 
> Isn't the whole point of the eviction lock to prevent page table 
> evictions while manipulating page tables?
>
> Or does this only apply to PTE invalidations which never allocated 
> memory? Is that the only case that doesn't reserve page tables?

Yes, exactly. We essentially have to distinct two cases here:

1. We are validating PTEs and so eventually need to allocate page tables.
     For this the root PD is reserved and we actually know that the VM 
can't be evicted.
     But we still need to hold the lock while pushing the actual updates 
the hardware to make sure we don't mess up the data structures with 
concurrent invalidations.

2. We are invalidating PTEs.
     Here we might or might not have the root PD reserved and so need to 
make sure that nobody is evicting page tables while we are invalidating.

Quite a complicated dance, but of hand I also don't see much other choice.

Regards,
Christian.

>
> Regards,
>   Felix
>
> On 2020-02-12 10:14 a.m., Christian König wrote:
>> We need to make sure to not allocate PDs/PTs while holding
>> the eviction lock or otherwise we will run into lock inversion
>> in the MM as soon as we enable the MMU notifier.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31 +++++++++++++++++++++-----
>>   1 file changed, 25 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 77c400675b79..e7ab0c1e2793 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -897,27 +897,42 @@ static int amdgpu_vm_alloc_pts(struct 
>> amdgpu_device *adev,
>>       struct amdgpu_vm_pt *entry = cursor->entry;
>>       struct amdgpu_bo_param bp;
>>       struct amdgpu_bo *pt;
>> +    bool need_entries;
>>       int r;
>>   -    if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>> +    need_entries = cursor->level < AMDGPU_VM_PTB && !entry->entries;
>> +    if (!need_entries && entry->base.bo)
>> +        return 0;
>> +
>> +    /* We need to make sure that we don't allocate PDs/PTs while 
>> holding the
>> +     * eviction lock or we run into lock recursion in the MM.
>> +     */
>> +    amdgpu_vm_eviction_unlock(vm);
>> +
>> +    if (need_entries) {
>>           unsigned num_entries;
>>             num_entries = amdgpu_vm_num_entries(adev, cursor->level);
>>           entry->entries = kvmalloc_array(num_entries,
>>                           sizeof(*entry->entries),
>>                           GFP_KERNEL | __GFP_ZERO);
>> -        if (!entry->entries)
>> -            return -ENOMEM;
>> +        if (!entry->entries) {
>> +            r = -ENOMEM;
>> +            goto error_lock;
>> +        }
>>       }
>>   -    if (entry->base.bo)
>> -        return 0;
>> +    if (entry->base.bo) {
>> +        r = 0;
>> +        goto error_lock;
>> +    }
>>         amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp);
>>         r = amdgpu_bo_create(adev, &bp, &pt);
>> +    amdgpu_vm_eviction_lock(vm);
>>       if (r)
>> -        return r;
>> +        goto error_free_pt;
>>         /* Keep a reference to the root directory to avoid
>>        * freeing them up in the wrong order.
>> @@ -936,6 +951,10 @@ static int amdgpu_vm_alloc_pts(struct 
>> amdgpu_device *adev,
>>       amdgpu_bo_unref(&pt);
>>       entry->base.bo = NULL;
>>       return r;
>> +
>> +error_lock:
>> +    amdgpu_vm_eviction_lock(vm);
>> +    return r;
>>   }
>>     /**

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] drm/amdgpu: stop allocating PDs/PTs with the eviction lock held
  2020-02-27 15:28   ` Christian König
@ 2020-02-27 18:43     ` Felix Kuehling
  0 siblings, 0 replies; 4+ messages in thread
From: Felix Kuehling @ 2020-02-27 18:43 UTC (permalink / raw)
  To: christian.koenig, alex.sierra, amd-gfx

On 2020-02-27 9:28, Christian König wrote:
> Hi Felix,
>
> so coming back to this after two weeks of distraction.
>
> Am 14.02.20 um 22:12 schrieb Felix Kuehling:
>> Now you allow eviction of page tables while you allocate page tables. 
>> Isn't the whole point of the eviction lock to prevent page table 
>> evictions while manipulating page tables?
>>
>> Or does this only apply to PTE invalidations which never allocated 
>> memory? Is that the only case that doesn't reserve page tables?
>
> Yes, exactly. We essentially have to distinct two cases here:
>
> 1. We are validating PTEs and so eventually need to allocate page tables.
>     For this the root PD is reserved and we actually know that the VM 
> can't be evicted.
>     But we still need to hold the lock while pushing the actual 
> updates the hardware to make sure we don't mess up the data structures 
> with concurrent invalidations.
>
> 2. We are invalidating PTEs.
>     Here we might or might not have the root PD reserved and so need 
> to make sure that nobody is evicting page tables while we are 
> invalidating.
>
> Quite a complicated dance, but of hand I also don't see much other 
> choice.

Sounds good. Maybe we should have an assert that the root PD is reserved 
in amdgpu_vm_alloc_pts. With that modification the patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


>
> Regards,
> Christian.
>
>>
>> Regards,
>>   Felix
>>
>> On 2020-02-12 10:14 a.m., Christian König wrote:
>>> We need to make sure to not allocate PDs/PTs while holding
>>> the eviction lock or otherwise we will run into lock inversion
>>> in the MM as soon as we enable the MMU notifier.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31 
>>> +++++++++++++++++++++-----
>>>   1 file changed, 25 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 77c400675b79..e7ab0c1e2793 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -897,27 +897,42 @@ static int amdgpu_vm_alloc_pts(struct 
>>> amdgpu_device *adev,
>>>       struct amdgpu_vm_pt *entry = cursor->entry;
>>>       struct amdgpu_bo_param bp;
>>>       struct amdgpu_bo *pt;
>>> +    bool need_entries;
>>>       int r;
>>>   -    if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
>>> +    need_entries = cursor->level < AMDGPU_VM_PTB && !entry->entries;
>>> +    if (!need_entries && entry->base.bo)
>>> +        return 0;
>>> +
>>> +    /* We need to make sure that we don't allocate PDs/PTs while 
>>> holding the
>>> +     * eviction lock or we run into lock recursion in the MM.
>>> +     */
>>> +    amdgpu_vm_eviction_unlock(vm);
>>> +
>>> +    if (need_entries) {
>>>           unsigned num_entries;
>>>             num_entries = amdgpu_vm_num_entries(adev, cursor->level);
>>>           entry->entries = kvmalloc_array(num_entries,
>>>                           sizeof(*entry->entries),
>>>                           GFP_KERNEL | __GFP_ZERO);
>>> -        if (!entry->entries)
>>> -            return -ENOMEM;
>>> +        if (!entry->entries) {
>>> +            r = -ENOMEM;
>>> +            goto error_lock;
>>> +        }
>>>       }
>>>   -    if (entry->base.bo)
>>> -        return 0;
>>> +    if (entry->base.bo) {
>>> +        r = 0;
>>> +        goto error_lock;
>>> +    }
>>>         amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp);
>>>         r = amdgpu_bo_create(adev, &bp, &pt);
>>> +    amdgpu_vm_eviction_lock(vm);
>>>       if (r)
>>> -        return r;
>>> +        goto error_free_pt;
>>>         /* Keep a reference to the root directory to avoid
>>>        * freeing them up in the wrong order.
>>> @@ -936,6 +951,10 @@ static int amdgpu_vm_alloc_pts(struct 
>>> amdgpu_device *adev,
>>>       amdgpu_bo_unref(&pt);
>>>       entry->base.bo = NULL;
>>>       return r;
>>> +
>>> +error_lock:
>>> +    amdgpu_vm_eviction_lock(vm);
>>> +    return r;
>>>   }
>>>     /**
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-02-27 18:43 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-12 15:14 [PATCH] drm/amdgpu: stop allocating PDs/PTs with the eviction lock held Christian König
2020-02-14 21:12 ` Felix Kuehling
2020-02-27 15:28   ` Christian König
2020-02-27 18:43     ` Felix Kuehling

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.