[PATCH 1/2] drm/amdgpu: free pasid early before converting a vm

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/2] drm/amdgpu: free pasid early before converting a vm
@ 2021-06-23 12:25 Nirmoy Das
  2021-06-23 12:25 ` [PATCH 2/2] drm/amdgpu: use xarray for storing pasid in vm Nirmoy Das
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Nirmoy Das @ 2021-06-23 12:25 UTC (permalink / raw)
  To: amd-gfx; +Cc: Felix.Kuehling, Nirmoy Das, Christian.Koenig

VM code should not be responsible for freeing pasid as pasid
gets allocated outside of VM code, before initializing a vm.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c           | 5 -----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index f96598279593..42e22b1fdfee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1287,6 +1287,12 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
 	if (avm->process_info)
 		return -EINVAL;
 
+	/* Free the original amdgpu allocated pasid,
+	 * will be replaced with kfd allocated pasid
+	 */
+	if (avm->pasid)
+		amdgpu_pasid_free(avm->pasid);
+
 	/* Convert VM into a compute VM */
 	ret = amdgpu_vm_make_compute(adev, avm, pasid);
 	if (ret)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 63975bda8e76..be841d62a1d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3094,11 +3094,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
 		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
 		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-
-		/* Free the original amdgpu allocated pasid
-		 * Will be replaced with kfd allocated pasid
-		 */
-		amdgpu_pasid_free(vm->pasid);
 		vm->pasid = 0;
 	}
 
-- 
2.32.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/2] drm/amdgpu: use xarray for storing pasid in vm
  2021-06-23 12:25 [PATCH 1/2] drm/amdgpu: free pasid early before converting a vm Nirmoy Das
@ 2021-06-23 12:25 ` Nirmoy Das
  2021-06-23 12:50   ` Christian König
  2021-06-23 12:46 ` [PATCH 1/2] drm/amdgpu: free pasid early before converting a vm Christian König
  2021-06-23 19:05 ` Felix Kuehling
  2 siblings, 1 reply; 11+ messages in thread
From: Nirmoy Das @ 2021-06-23 12:25 UTC (permalink / raw)
  To: amd-gfx; +Cc: Felix.Kuehling, Nirmoy Das, Christian.Koenig

Replace idr with xarray as we actually need hash functionality.
Cleanup code related to vm pasid by adding helper function.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 134 +++++++++++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   3 +-
 2 files changed, 60 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index be841d62a1d4..e047e56a4be2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -87,6 +87,39 @@ struct amdgpu_prt_cb {
 	struct dma_fence_cb cb;
 };
 
+static int amdgpu_vm_set_pasid(struct amdgpu_device *adev,
+			       struct amdgpu_vm *vm,
+			       unsigned long pasid)
+{
+	unsigned long flags;
+	int r;
+
+	if (pasid) {
+		xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+		r = xa_err(__xa_store(&adev->vm_manager.pasids, pasid, vm,
+				      GFP_ATOMIC));
+		xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+		if (r < 0)
+			return r;
+	} else {
+		unsigned long index;
+		struct amdgpu_vm *res;
+
+		xa_lock_irqsave(&adev->vm_manager.pasids, flags);
+		xa_for_each(&adev->vm_manager.pasids, index, res) {
+			if (res == vm) {
+				__xa_erase(&adev->vm_manager.pasids, index);
+				break;
+			}
+		}
+		xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
+	}
+
+	vm->pasid = pasid;
+
+	return 0;
+}
+
 /*
  * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
  * happens while holding this lock anywhere to prevent deadlocks when
@@ -2940,18 +2973,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid)
 
 	amdgpu_bo_unreserve(vm->root.bo);
 
-	if (pasid) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
-		r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
-			      GFP_ATOMIC);
-		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-		if (r < 0)
-			goto error_free_root;
-
-		vm->pasid = pasid;
-	}
+	r = amdgpu_vm_set_pasid(adev, vm, pasid);
+	if (r)
+		goto error_free_root;
 
 	INIT_KFIFO(vm->faults);
 
@@ -3039,18 +3063,11 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	if (r)
 		goto unreserve_bo;
 
-	if (pasid) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
-		r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
-			      GFP_ATOMIC);
-		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-
-		if (r == -ENOSPC)
-			goto unreserve_bo;
-		r = 0;
-	}
+	/* remove previous {pasid:vm} entry first */
+	r = amdgpu_vm_set_pasid(adev, vm, 0);
+	r = amdgpu_vm_set_pasid(adev, vm, pasid);
+	if (r)
+		goto unreserve_bo;
 
 	/* Check if PD needs to be reinitialized and do it before
 	 * changing any other state, in case it fails.
@@ -3061,7 +3078,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 				       to_amdgpu_bo_vm(vm->root.bo),
 				       false);
 		if (r)
-			goto free_idr;
+			goto free_pasid_entry;
 	}
 
 	/* Update VM state */
@@ -3078,7 +3095,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		r = amdgpu_bo_sync_wait(vm->root.bo,
 					AMDGPU_FENCE_OWNER_UNDEFINED, true);
 		if (r)
-			goto free_idr;
+			goto free_pasid_entry;
 
 		vm->update_funcs = &amdgpu_vm_cpu_funcs;
 	} else {
@@ -3088,31 +3105,14 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	vm->last_update = NULL;
 	vm->is_compute_context = true;
 
-	if (vm->pasid) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
-		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
-		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-		vm->pasid = 0;
-	}
-
 	/* Free the shadow bo for compute VM */
 	amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
 
-	if (pasid)
-		vm->pasid = pasid;
-
 	goto unreserve_bo;
 
-free_idr:
-	if (pasid) {
-		unsigned long flags;
+free_pasid_entry:
+	amdgpu_vm_set_pasid(adev, vm, 0);
 
-		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
-		idr_remove(&adev->vm_manager.pasid_idr, pasid);
-		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-	}
 unreserve_bo:
 	amdgpu_bo_unreserve(vm->root.bo);
 	return r;
@@ -3128,14 +3128,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  */
 void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
-	if (vm->pasid) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
-		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
-		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-	}
-	vm->pasid = 0;
+	amdgpu_vm_set_pasid(adev, vm, 0);
 	vm->is_compute_context = false;
 }
 
@@ -3159,15 +3152,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
 	root = amdgpu_bo_ref(vm->root.bo);
 	amdgpu_bo_reserve(root, true);
-	if (vm->pasid) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
-		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
-		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
-		vm->pasid = 0;
-	}
-
+	amdgpu_vm_set_pasid(adev, vm, 0);
 	dma_fence_wait(vm->last_unlocked, false);
 	dma_fence_put(vm->last_unlocked);
 
@@ -3249,8 +3234,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	adev->vm_manager.vm_update_mode = 0;
 #endif
 
-	idr_init(&adev->vm_manager.pasid_idr);
-	spin_lock_init(&adev->vm_manager.pasid_lock);
+	xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
 }
 
 /**
@@ -3262,8 +3246,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
  */
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 {
-	WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
-	idr_destroy(&adev->vm_manager.pasid_idr);
+	WARN_ON(!xa_empty(&adev->vm_manager.pasids));
+	xa_destroy(&adev->vm_manager.pasids);
 
 	amdgpu_vmid_mgr_fini(adev);
 }
@@ -3332,13 +3316,13 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
 	struct amdgpu_vm *vm;
 	unsigned long flags;
 
-	spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
 
-	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+	vm = xa_load(&adev->vm_manager.pasids, pasid);
 	if (vm)
 		*task_info = vm->task_info;
 
-	spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
 }
 
 /**
@@ -3380,15 +3364,15 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 	struct amdgpu_vm *vm;
 	int r;
 
-	spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
-	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
+	vm = xa_load(&adev->vm_manager.pasids, pasid);
 	if (vm) {
 		root = amdgpu_bo_ref(vm->root.bo);
 		is_compute_context = vm->is_compute_context;
 	} else {
 		root = NULL;
 	}
-	spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
+	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
 
 	if (!root)
 		return false;
@@ -3406,11 +3390,11 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 		goto error_unref;
 
 	/* Double check that the VM still exists */
-	spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
-	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
+	vm = xa_load(&adev->vm_manager.pasids, pasid);
 	if (vm && vm->root.bo != root)
 		vm = NULL;
-	spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
+	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
 	if (!vm)
 		goto error_unlock;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ddb85a85cbba..31c467764162 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -359,8 +359,7 @@ struct amdgpu_vm_manager {
 	/* PASID to VM mapping, will be used in interrupt context to
 	 * look up VM of a page fault
 	 */
-	struct idr				pasid_idr;
-	spinlock_t				pasid_lock;
+	struct xarray				pasids;
 };
 
 struct amdgpu_bo_va_mapping;
-- 
2.32.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: use xarray for storing pasid in vm
  2021-06-23 12:25 ` [PATCH 2/2] drm/amdgpu: use xarray for storing pasid in vm Nirmoy Das
@ 2021-06-23 12:50   ` Christian König
  2021-06-23 13:11     ` Das, Nirmoy
  0 siblings, 1 reply; 11+ messages in thread
From: Christian König @ 2021-06-23 12:50 UTC (permalink / raw)
  To: Nirmoy Das, amd-gfx; +Cc: Felix.Kuehling, Christian.Koenig



Am 23.06.21 um 14:25 schrieb Nirmoy Das:
> Replace idr with xarray as we actually need hash functionality.
> Cleanup code related to vm pasid by adding helper function.
>
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 134 +++++++++++--------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   3 +-
>   2 files changed, 60 insertions(+), 77 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index be841d62a1d4..e047e56a4be2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -87,6 +87,39 @@ struct amdgpu_prt_cb {
>   	struct dma_fence_cb cb;
>   };
>   
> +static int amdgpu_vm_set_pasid(struct amdgpu_device *adev,
> +			       struct amdgpu_vm *vm,
> +			       unsigned long pasid)

Some kerneldoc please describing why we have that function.

> +{
> +	unsigned long flags;
> +	int r;
> +
> +	if (pasid) {

You should probably reorder the code so that the old pasid is first 
removed and then eventually the new one added.

> +		xa_lock_irqsave(&adev->vm_manager.pasids, flags);
> +		r = xa_err(__xa_store(&adev->vm_manager.pasids, pasid, vm,
> +				      GFP_ATOMIC));
> +		xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);

As far as I can see this can just use xa_store() which also drops the 
need for GFP_ATOMIC here.

> +		if (r < 0)
> +			return r;
> +	} else {
> +		unsigned long index;
> +		struct amdgpu_vm *res;
> +
> +		xa_lock_irqsave(&adev->vm_manager.pasids, flags);
> +		xa_for_each(&adev->vm_manager.pasids, index, res) {
> +			if (res == vm) {
> +				__xa_erase(&adev->vm_manager.pasids, index);
> +				break;
> +			}
> +		}
> +		xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);

That is really ugly, why is that necessary?

Christian

> +	}
> +
> +	vm->pasid = pasid;
> +
> +	return 0;
> +}
> +
>   /*
>    * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
>    * happens while holding this lock anywhere to prevent deadlocks when
> @@ -2940,18 +2973,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid)
>   
>   	amdgpu_bo_unreserve(vm->root.bo);
>   
> -	if (pasid) {
> -		unsigned long flags;
> -
> -		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
> -		r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
> -			      GFP_ATOMIC);
> -		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
> -		if (r < 0)
> -			goto error_free_root;
> -
> -		vm->pasid = pasid;
> -	}
> +	r = amdgpu_vm_set_pasid(adev, vm, pasid);
> +	if (r)
> +		goto error_free_root;
>   
>   	INIT_KFIFO(vm->faults);
>   
> @@ -3039,18 +3063,11 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	if (r)
>   		goto unreserve_bo;
>   
> -	if (pasid) {
> -		unsigned long flags;
> -
> -		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
> -		r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
> -			      GFP_ATOMIC);
> -		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
> -
> -		if (r == -ENOSPC)
> -			goto unreserve_bo;
> -		r = 0;
> -	}
> +	/* remove previous {pasid:vm} entry first */
> +	r = amdgpu_vm_set_pasid(adev, vm, 0);
> +	r = amdgpu_vm_set_pasid(adev, vm, pasid);
> +	if (r)
> +		goto unreserve_bo;
>   
>   	/* Check if PD needs to be reinitialized and do it before
>   	 * changing any other state, in case it fails.
> @@ -3061,7 +3078,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   				       to_amdgpu_bo_vm(vm->root.bo),
>   				       false);
>   		if (r)
> -			goto free_idr;
> +			goto free_pasid_entry;
>   	}
>   
>   	/* Update VM state */
> @@ -3078,7 +3095,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		r = amdgpu_bo_sync_wait(vm->root.bo,
>   					AMDGPU_FENCE_OWNER_UNDEFINED, true);
>   		if (r)
> -			goto free_idr;
> +			goto free_pasid_entry;
>   
>   		vm->update_funcs = &amdgpu_vm_cpu_funcs;
>   	} else {
> @@ -3088,31 +3105,14 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   	vm->last_update = NULL;
>   	vm->is_compute_context = true;
>   
> -	if (vm->pasid) {
> -		unsigned long flags;
> -
> -		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
> -		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
> -		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
> -		vm->pasid = 0;
> -	}
> -
>   	/* Free the shadow bo for compute VM */
>   	amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
>   
> -	if (pasid)
> -		vm->pasid = pasid;
> -
>   	goto unreserve_bo;
>   
> -free_idr:
> -	if (pasid) {
> -		unsigned long flags;
> +free_pasid_entry:
> +	amdgpu_vm_set_pasid(adev, vm, 0);
>   
> -		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
> -		idr_remove(&adev->vm_manager.pasid_idr, pasid);
> -		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
> -	}
>   unreserve_bo:
>   	amdgpu_bo_unreserve(vm->root.bo);
>   	return r;
> @@ -3128,14 +3128,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>    */
>   void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   {
> -	if (vm->pasid) {
> -		unsigned long flags;
> -
> -		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
> -		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
> -		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
> -	}
> -	vm->pasid = 0;
> +	amdgpu_vm_set_pasid(adev, vm, 0);
>   	vm->is_compute_context = false;
>   }
>   
> @@ -3159,15 +3152,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>   
>   	root = amdgpu_bo_ref(vm->root.bo);
>   	amdgpu_bo_reserve(root, true);
> -	if (vm->pasid) {
> -		unsigned long flags;
> -
> -		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
> -		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
> -		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
> -		vm->pasid = 0;
> -	}
> -
> +	amdgpu_vm_set_pasid(adev, vm, 0);
>   	dma_fence_wait(vm->last_unlocked, false);
>   	dma_fence_put(vm->last_unlocked);
>   
> @@ -3249,8 +3234,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
>   	adev->vm_manager.vm_update_mode = 0;
>   #endif
>   
> -	idr_init(&adev->vm_manager.pasid_idr);
> -	spin_lock_init(&adev->vm_manager.pasid_lock);
> +	xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
>   }
>   
>   /**
> @@ -3262,8 +3246,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
>    */
>   void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
>   {
> -	WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
> -	idr_destroy(&adev->vm_manager.pasid_idr);
> +	WARN_ON(!xa_empty(&adev->vm_manager.pasids));
> +	xa_destroy(&adev->vm_manager.pasids);
>   
>   	amdgpu_vmid_mgr_fini(adev);
>   }
> @@ -3332,13 +3316,13 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
>   	struct amdgpu_vm *vm;
>   	unsigned long flags;
>   
> -	spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
> +	xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>   
> -	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
> +	vm = xa_load(&adev->vm_manager.pasids, pasid);
>   	if (vm)
>   		*task_info = vm->task_info;
>   
> -	spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
> +	xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>   }
>   
>   /**
> @@ -3380,15 +3364,15 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
>   	struct amdgpu_vm *vm;
>   	int r;
>   
> -	spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
> -	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
> +	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
> +	vm = xa_load(&adev->vm_manager.pasids, pasid);
>   	if (vm) {
>   		root = amdgpu_bo_ref(vm->root.bo);
>   		is_compute_context = vm->is_compute_context;
>   	} else {
>   		root = NULL;
>   	}
> -	spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
> +	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
>   
>   	if (!root)
>   		return false;
> @@ -3406,11 +3390,11 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
>   		goto error_unref;
>   
>   	/* Double check that the VM still exists */
> -	spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
> -	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
> +	xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
> +	vm = xa_load(&adev->vm_manager.pasids, pasid);
>   	if (vm && vm->root.bo != root)
>   		vm = NULL;
> -	spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
> +	xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
>   	if (!vm)
>   		goto error_unlock;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index ddb85a85cbba..31c467764162 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -359,8 +359,7 @@ struct amdgpu_vm_manager {
>   	/* PASID to VM mapping, will be used in interrupt context to
>   	 * look up VM of a page fault
>   	 */
> -	struct idr				pasid_idr;
> -	spinlock_t				pasid_lock;
> +	struct xarray				pasids;
>   };
>   
>   struct amdgpu_bo_va_mapping;

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: use xarray for storing pasid in vm
  2021-06-23 12:50   ` Christian König
@ 2021-06-23 13:11     ` Das, Nirmoy
  2021-06-23 13:40       ` Christian König
  0 siblings, 1 reply; 11+ messages in thread
From: Das, Nirmoy @ 2021-06-23 13:11 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: Felix.Kuehling, Christian.Koenig


On 6/23/2021 2:50 PM, Christian König wrote:
>
>
> Am 23.06.21 um 14:25 schrieb Nirmoy Das:
>> Replace idr with xarray as we actually need hash functionality.
>> Cleanup code related to vm pasid by adding helper function.
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 134 +++++++++++--------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   3 +-
>>   2 files changed, 60 insertions(+), 77 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index be841d62a1d4..e047e56a4be2 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -87,6 +87,39 @@ struct amdgpu_prt_cb {
>>       struct dma_fence_cb cb;
>>   };
>>   +static int amdgpu_vm_set_pasid(struct amdgpu_device *adev,
>> +                   struct amdgpu_vm *vm,
>> +                   unsigned long pasid)
>
> Some kerneldoc please describing why we have that function.


Alright.


>
>> +{
>> +    unsigned long flags;
>> +    int r;
>> +
>> +    if (pasid) {
>
> You should probably reorder the code so that the old pasid is first 
> removed and then eventually the new one added.
>
>> + xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>> +        r = xa_err(__xa_store(&adev->vm_manager.pasids, pasid, vm,
>> +                      GFP_ATOMIC));
>> +        xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>
> As far as I can see this can just use xa_store() which also drops the 
> need for GFP_ATOMIC here.


Do we need to have this irqsave/restore to keep passids safe for 
amdgpu_vm_handle_fault() ?

xa_store() takes the spinlock without irq flags so I wanted to keep old 
behavior.


>
>> +        if (r < 0)
>> +            return r;
>> +    } else {
>> +        unsigned long index;
>> +        struct amdgpu_vm *res;
>> +
>> +        xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>> +        xa_for_each(&adev->vm_manager.pasids, index, res) {
>> +            if (res == vm) {
>> +                __xa_erase(&adev->vm_manager.pasids, index);
>> +                break;
>> +            }
>> +        }
>> +        xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>
> That is really ugly, why is that necessary?

Do you mean the lock around xa_for_each() ? I think I can just used lock 
around __xa_erase() or just xa_erase() if just simple spinlock without 
flags is enough.


Regards,

Nirmoy


>
> Christian
>
>> +    }
>> +
>> +    vm->pasid = pasid;
>> +
>> +    return 0;
>> +}
>> +
>>   /*
>>    * vm eviction_lock can be taken in MMU notifiers. Make sure no 
>> reclaim-FS
>>    * happens while holding this lock anywhere to prevent deadlocks when
>> @@ -2940,18 +2973,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, 
>> struct amdgpu_vm *vm, u32 pasid)
>>         amdgpu_bo_unreserve(vm->root.bo);
>>   -    if (pasid) {
>> -        unsigned long flags;
>> -
>> -        spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>> -        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid 
>> + 1,
>> -                  GFP_ATOMIC);
>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>> -        if (r < 0)
>> -            goto error_free_root;
>> -
>> -        vm->pasid = pasid;
>> -    }
>> +    r = amdgpu_vm_set_pasid(adev, vm, pasid);
>> +    if (r)
>> +        goto error_free_root;
>>         INIT_KFIFO(vm->faults);
>>   @@ -3039,18 +3063,11 @@ int amdgpu_vm_make_compute(struct 
>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>       if (r)
>>           goto unreserve_bo;
>>   -    if (pasid) {
>> -        unsigned long flags;
>> -
>> -        spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>> -        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid 
>> + 1,
>> -                  GFP_ATOMIC);
>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>> -
>> -        if (r == -ENOSPC)
>> -            goto unreserve_bo;
>> -        r = 0;
>> -    }
>> +    /* remove previous {pasid:vm} entry first */
>> +    r = amdgpu_vm_set_pasid(adev, vm, 0);
>> +    r = amdgpu_vm_set_pasid(adev, vm, pasid);
>> +    if (r)
>> +        goto unreserve_bo;
>>         /* Check if PD needs to be reinitialized and do it before
>>        * changing any other state, in case it fails.
>> @@ -3061,7 +3078,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device 
>> *adev, struct amdgpu_vm *vm,
>>                          to_amdgpu_bo_vm(vm->root.bo),
>>                          false);
>>           if (r)
>> -            goto free_idr;
>> +            goto free_pasid_entry;
>>       }
>>         /* Update VM state */
>> @@ -3078,7 +3095,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device 
>> *adev, struct amdgpu_vm *vm,
>>           r = amdgpu_bo_sync_wait(vm->root.bo,
>>                       AMDGPU_FENCE_OWNER_UNDEFINED, true);
>>           if (r)
>> -            goto free_idr;
>> +            goto free_pasid_entry;
>>             vm->update_funcs = &amdgpu_vm_cpu_funcs;
>>       } else {
>> @@ -3088,31 +3105,14 @@ int amdgpu_vm_make_compute(struct 
>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>       vm->last_update = NULL;
>>       vm->is_compute_context = true;
>>   -    if (vm->pasid) {
>> -        unsigned long flags;
>> -
>> -        spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>> -        vm->pasid = 0;
>> -    }
>> -
>>       /* Free the shadow bo for compute VM */
>> amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
>>   -    if (pasid)
>> -        vm->pasid = pasid;
>> -
>>       goto unreserve_bo;
>>   -free_idr:
>> -    if (pasid) {
>> -        unsigned long flags;
>> +free_pasid_entry:
>> +    amdgpu_vm_set_pasid(adev, vm, 0);
>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>> -        idr_remove(&adev->vm_manager.pasid_idr, pasid);
>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>> -    }
>>   unreserve_bo:
>>       amdgpu_bo_unreserve(vm->root.bo);
>>       return r;
>> @@ -3128,14 +3128,7 @@ int amdgpu_vm_make_compute(struct 
>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>    */
>>   void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct 
>> amdgpu_vm *vm)
>>   {
>> -    if (vm->pasid) {
>> -        unsigned long flags;
>> -
>> -        spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>> -    }
>> -    vm->pasid = 0;
>> +    amdgpu_vm_set_pasid(adev, vm, 0);
>>       vm->is_compute_context = false;
>>   }
>>   @@ -3159,15 +3152,7 @@ void amdgpu_vm_fini(struct amdgpu_device 
>> *adev, struct amdgpu_vm *vm)
>>         root = amdgpu_bo_ref(vm->root.bo);
>>       amdgpu_bo_reserve(root, true);
>> -    if (vm->pasid) {
>> -        unsigned long flags;
>> -
>> -        spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>> -        vm->pasid = 0;
>> -    }
>> -
>> +    amdgpu_vm_set_pasid(adev, vm, 0);
>>       dma_fence_wait(vm->last_unlocked, false);
>>       dma_fence_put(vm->last_unlocked);
>>   @@ -3249,8 +3234,7 @@ void amdgpu_vm_manager_init(struct 
>> amdgpu_device *adev)
>>       adev->vm_manager.vm_update_mode = 0;
>>   #endif
>>   -    idr_init(&adev->vm_manager.pasid_idr);
>> -    spin_lock_init(&adev->vm_manager.pasid_lock);
>> +    xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
>>   }
>>     /**
>> @@ -3262,8 +3246,8 @@ void amdgpu_vm_manager_init(struct 
>> amdgpu_device *adev)
>>    */
>>   void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
>>   {
>> -    WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
>> -    idr_destroy(&adev->vm_manager.pasid_idr);
>> +    WARN_ON(!xa_empty(&adev->vm_manager.pasids));
>> +    xa_destroy(&adev->vm_manager.pasids);
>>         amdgpu_vmid_mgr_fini(adev);
>>   }
>> @@ -3332,13 +3316,13 @@ void amdgpu_vm_get_task_info(struct 
>> amdgpu_device *adev, u32 pasid,
>>       struct amdgpu_vm *vm;
>>       unsigned long flags;
>>   -    spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>> +    xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>   -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>> +    vm = xa_load(&adev->vm_manager.pasids, pasid);
>>       if (vm)
>>           *task_info = vm->task_info;
>>   - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>> +    xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>   }
>>     /**
>> @@ -3380,15 +3364,15 @@ bool amdgpu_vm_handle_fault(struct 
>> amdgpu_device *adev, u32 pasid,
>>       struct amdgpu_vm *vm;
>>       int r;
>>   -    spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>> +    xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>> +    vm = xa_load(&adev->vm_manager.pasids, pasid);
>>       if (vm) {
>>           root = amdgpu_bo_ref(vm->root.bo);
>>           is_compute_context = vm->is_compute_context;
>>       } else {
>>           root = NULL;
>>       }
>> -    spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>> +    xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
>>         if (!root)
>>           return false;
>> @@ -3406,11 +3390,11 @@ bool amdgpu_vm_handle_fault(struct 
>> amdgpu_device *adev, u32 pasid,
>>           goto error_unref;
>>         /* Double check that the VM still exists */
>> -    spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>> +    xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>> +    vm = xa_load(&adev->vm_manager.pasids, pasid);
>>       if (vm && vm->root.bo != root)
>>           vm = NULL;
>> -    spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>> +    xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
>>       if (!vm)
>>           goto error_unlock;
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index ddb85a85cbba..31c467764162 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -359,8 +359,7 @@ struct amdgpu_vm_manager {
>>       /* PASID to VM mapping, will be used in interrupt context to
>>        * look up VM of a page fault
>>        */
>> -    struct idr                pasid_idr;
>> -    spinlock_t                pasid_lock;
>> +    struct xarray                pasids;
>>   };
>>     struct amdgpu_bo_va_mapping;
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: use xarray for storing pasid in vm
  2021-06-23 13:11     ` Das, Nirmoy
@ 2021-06-23 13:40       ` Christian König
  2021-06-23 14:54         ` Das, Nirmoy
  0 siblings, 1 reply; 11+ messages in thread
From: Christian König @ 2021-06-23 13:40 UTC (permalink / raw)
  To: Das, Nirmoy, Christian König, amd-gfx; +Cc: Felix.Kuehling



Am 23.06.21 um 15:11 schrieb Das, Nirmoy:
>
> On 6/23/2021 2:50 PM, Christian König wrote:
>>
>>
>> Am 23.06.21 um 14:25 schrieb Nirmoy Das:
>>> Replace idr with xarray as we actually need hash functionality.
>>> Cleanup code related to vm pasid by adding helper function.
>>>
>>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 134 
>>> +++++++++++--------------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   3 +-
>>>   2 files changed, 60 insertions(+), 77 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index be841d62a1d4..e047e56a4be2 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -87,6 +87,39 @@ struct amdgpu_prt_cb {
>>>       struct dma_fence_cb cb;
>>>   };
>>>   +static int amdgpu_vm_set_pasid(struct amdgpu_device *adev,
>>> +                   struct amdgpu_vm *vm,
>>> +                   unsigned long pasid)
>>
>> Some kerneldoc please describing why we have that function.
>
>
> Alright.
>
>
>>
>>> +{
>>> +    unsigned long flags;
>>> +    int r;
>>> +
>>> +    if (pasid) {
>>
>> You should probably reorder the code so that the old pasid is first 
>> removed and then eventually the new one added.
>>
>>> + xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>> +        r = xa_err(__xa_store(&adev->vm_manager.pasids, pasid, vm,
>>> +                      GFP_ATOMIC));
>>> +        xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>
>> As far as I can see this can just use xa_store() which also drops the 
>> need for GFP_ATOMIC here.
>
>
> Do we need to have this irqsave/restore to keep passids safe for 
> amdgpu_vm_handle_fault() ?

No, we need the VM safe not the pasid.

> xa_store() takes the spinlock without irq flags so I wanted to keep 
> old behavior.

Yeah, that's indeed problematic. You need to keep that straight or 
lockdep will complain.

IIRC there is also a function to reserve an entry before you take the 
lock. Maybe use that one.

>
>
>>
>>> +        if (r < 0)
>>> +            return r;
>>> +    } else {
>>> +        unsigned long index;
>>> +        struct amdgpu_vm *res;
>>> +
>>> +        xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>> +        xa_for_each(&adev->vm_manager.pasids, index, res) {
>>> +            if (res == vm) {
>>> +                __xa_erase(&adev->vm_manager.pasids, index);
>>> +                break;
>>> +            }
>>> +        }
>>> +        xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>
>> That is really ugly, why is that necessary?
>
> Do you mean the lock around xa_for_each() ? I think I can just used 
> lock around __xa_erase() or just xa_erase() if just simple spinlock 
> without flags is enough.

I mean why you use xa_for_each() here?

Just __xa_erase should be enough.

Christian.

>
>
> Regards,
>
> Nirmoy
>
>
>>
>> Christian
>>
>>> +    }
>>> +
>>> +    vm->pasid = pasid;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   /*
>>>    * vm eviction_lock can be taken in MMU notifiers. Make sure no 
>>> reclaim-FS
>>>    * happens while holding this lock anywhere to prevent deadlocks when
>>> @@ -2940,18 +2973,9 @@ int amdgpu_vm_init(struct amdgpu_device 
>>> *adev, struct amdgpu_vm *vm, u32 pasid)
>>>         amdgpu_bo_unreserve(vm->root.bo);
>>>   -    if (pasid) {
>>> -        unsigned long flags;
>>> -
>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>> -        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid 
>>> + 1,
>>> -                  GFP_ATOMIC);
>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>> -        if (r < 0)
>>> -            goto error_free_root;
>>> -
>>> -        vm->pasid = pasid;
>>> -    }
>>> +    r = amdgpu_vm_set_pasid(adev, vm, pasid);
>>> +    if (r)
>>> +        goto error_free_root;
>>>         INIT_KFIFO(vm->faults);
>>>   @@ -3039,18 +3063,11 @@ int amdgpu_vm_make_compute(struct 
>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>       if (r)
>>>           goto unreserve_bo;
>>>   -    if (pasid) {
>>> -        unsigned long flags;
>>> -
>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>> -        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid 
>>> + 1,
>>> -                  GFP_ATOMIC);
>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>> -
>>> -        if (r == -ENOSPC)
>>> -            goto unreserve_bo;
>>> -        r = 0;
>>> -    }
>>> +    /* remove previous {pasid:vm} entry first */
>>> +    r = amdgpu_vm_set_pasid(adev, vm, 0);
>>> +    r = amdgpu_vm_set_pasid(adev, vm, pasid);
>>> +    if (r)
>>> +        goto unreserve_bo;
>>>         /* Check if PD needs to be reinitialized and do it before
>>>        * changing any other state, in case it fails.
>>> @@ -3061,7 +3078,7 @@ int amdgpu_vm_make_compute(struct 
>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>                          to_amdgpu_bo_vm(vm->root.bo),
>>>                          false);
>>>           if (r)
>>> -            goto free_idr;
>>> +            goto free_pasid_entry;
>>>       }
>>>         /* Update VM state */
>>> @@ -3078,7 +3095,7 @@ int amdgpu_vm_make_compute(struct 
>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>           r = amdgpu_bo_sync_wait(vm->root.bo,
>>>                       AMDGPU_FENCE_OWNER_UNDEFINED, true);
>>>           if (r)
>>> -            goto free_idr;
>>> +            goto free_pasid_entry;
>>>             vm->update_funcs = &amdgpu_vm_cpu_funcs;
>>>       } else {
>>> @@ -3088,31 +3105,14 @@ int amdgpu_vm_make_compute(struct 
>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>       vm->last_update = NULL;
>>>       vm->is_compute_context = true;
>>>   -    if (vm->pasid) {
>>> -        unsigned long flags;
>>> -
>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>> -        vm->pasid = 0;
>>> -    }
>>> -
>>>       /* Free the shadow bo for compute VM */
>>> amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
>>>   -    if (pasid)
>>> -        vm->pasid = pasid;
>>> -
>>>       goto unreserve_bo;
>>>   -free_idr:
>>> -    if (pasid) {
>>> -        unsigned long flags;
>>> +free_pasid_entry:
>>> +    amdgpu_vm_set_pasid(adev, vm, 0);
>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>> -        idr_remove(&adev->vm_manager.pasid_idr, pasid);
>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>> -    }
>>>   unreserve_bo:
>>>       amdgpu_bo_unreserve(vm->root.bo);
>>>       return r;
>>> @@ -3128,14 +3128,7 @@ int amdgpu_vm_make_compute(struct 
>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>    */
>>>   void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct 
>>> amdgpu_vm *vm)
>>>   {
>>> -    if (vm->pasid) {
>>> -        unsigned long flags;
>>> -
>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>> -    }
>>> -    vm->pasid = 0;
>>> +    amdgpu_vm_set_pasid(adev, vm, 0);
>>>       vm->is_compute_context = false;
>>>   }
>>>   @@ -3159,15 +3152,7 @@ void amdgpu_vm_fini(struct amdgpu_device 
>>> *adev, struct amdgpu_vm *vm)
>>>         root = amdgpu_bo_ref(vm->root.bo);
>>>       amdgpu_bo_reserve(root, true);
>>> -    if (vm->pasid) {
>>> -        unsigned long flags;
>>> -
>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>> -        vm->pasid = 0;
>>> -    }
>>> -
>>> +    amdgpu_vm_set_pasid(adev, vm, 0);
>>>       dma_fence_wait(vm->last_unlocked, false);
>>>       dma_fence_put(vm->last_unlocked);
>>>   @@ -3249,8 +3234,7 @@ void amdgpu_vm_manager_init(struct 
>>> amdgpu_device *adev)
>>>       adev->vm_manager.vm_update_mode = 0;
>>>   #endif
>>>   -    idr_init(&adev->vm_manager.pasid_idr);
>>> -    spin_lock_init(&adev->vm_manager.pasid_lock);
>>> +    xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
>>>   }
>>>     /**
>>> @@ -3262,8 +3246,8 @@ void amdgpu_vm_manager_init(struct 
>>> amdgpu_device *adev)
>>>    */
>>>   void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
>>>   {
>>> - WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
>>> -    idr_destroy(&adev->vm_manager.pasid_idr);
>>> +    WARN_ON(!xa_empty(&adev->vm_manager.pasids));
>>> +    xa_destroy(&adev->vm_manager.pasids);
>>>         amdgpu_vmid_mgr_fini(adev);
>>>   }
>>> @@ -3332,13 +3316,13 @@ void amdgpu_vm_get_task_info(struct 
>>> amdgpu_device *adev, u32 pasid,
>>>       struct amdgpu_vm *vm;
>>>       unsigned long flags;
>>>   -    spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>> +    xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>   -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>> +    vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>       if (vm)
>>>           *task_info = vm->task_info;
>>>   - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>> +    xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>>   }
>>>     /**
>>> @@ -3380,15 +3364,15 @@ bool amdgpu_vm_handle_fault(struct 
>>> amdgpu_device *adev, u32 pasid,
>>>       struct amdgpu_vm *vm;
>>>       int r;
>>>   -    spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>> +    xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>>> +    vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>       if (vm) {
>>>           root = amdgpu_bo_ref(vm->root.bo);
>>>           is_compute_context = vm->is_compute_context;
>>>       } else {
>>>           root = NULL;
>>>       }
>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>>> +    xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
>>>         if (!root)
>>>           return false;
>>> @@ -3406,11 +3390,11 @@ bool amdgpu_vm_handle_fault(struct 
>>> amdgpu_device *adev, u32 pasid,
>>>           goto error_unref;
>>>         /* Double check that the VM still exists */
>>> -    spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>> +    xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>>> +    vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>       if (vm && vm->root.bo != root)
>>>           vm = NULL;
>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>>> +    xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
>>>       if (!vm)
>>>           goto error_unlock;
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index ddb85a85cbba..31c467764162 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -359,8 +359,7 @@ struct amdgpu_vm_manager {
>>>       /* PASID to VM mapping, will be used in interrupt context to
>>>        * look up VM of a page fault
>>>        */
>>> -    struct idr                pasid_idr;
>>> -    spinlock_t                pasid_lock;
>>> +    struct xarray                pasids;
>>>   };
>>>     struct amdgpu_bo_va_mapping;
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: use xarray for storing pasid in vm
  2021-06-23 13:40       ` Christian König
@ 2021-06-23 14:54         ` Das, Nirmoy
  2021-06-23 15:02           ` Christian König
  0 siblings, 1 reply; 11+ messages in thread
From: Das, Nirmoy @ 2021-06-23 14:54 UTC (permalink / raw)
  To: Christian König, Christian König, amd-gfx; +Cc: Felix.Kuehling


On 6/23/2021 3:40 PM, Christian König wrote:
>
>
> Am 23.06.21 um 15:11 schrieb Das, Nirmoy:
>>
>> On 6/23/2021 2:50 PM, Christian König wrote:
>>>
>>>
>>> Am 23.06.21 um 14:25 schrieb Nirmoy Das:
>>>> Replace idr with xarray as we actually need hash functionality.
>>>> Cleanup code related to vm pasid by adding helper function.
>>>>
>>>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 134 
>>>> +++++++++++--------------
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   3 +-
>>>>   2 files changed, 60 insertions(+), 77 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index be841d62a1d4..e047e56a4be2 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -87,6 +87,39 @@ struct amdgpu_prt_cb {
>>>>       struct dma_fence_cb cb;
>>>>   };
>>>>   +static int amdgpu_vm_set_pasid(struct amdgpu_device *adev,
>>>> +                   struct amdgpu_vm *vm,
>>>> +                   unsigned long pasid)
>>>
>>> Some kerneldoc please describing why we have that function.
>>
>>
>> Alright.
>>
>>
>>>
>>>> +{
>>>> +    unsigned long flags;
>>>> +    int r;
>>>> +
>>>> +    if (pasid) {
>>>
>>> You should probably reorder the code so that the old pasid is first 
>>> removed and then eventually the new one added.
>>>
>>>> + xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>> +        r = xa_err(__xa_store(&adev->vm_manager.pasids, pasid, vm,
>>>> +                      GFP_ATOMIC));
>>>> + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>>
>>> As far as I can see this can just use xa_store() which also drops 
>>> the need for GFP_ATOMIC here.
>>
>>
>> Do we need to have this irqsave/restore to keep passids safe for 
>> amdgpu_vm_handle_fault() ?
>
> No, we need the VM safe not the pasid.


Would spin_lock_irq be enough to keep the VM safe then I can use 
xa_store_irq() and remove some extra locking code?


>
>> xa_store() takes the spinlock without irq flags so I wanted to keep 
>> old behavior.
>
> Yeah, that's indeed problematic. You need to keep that straight or 
> lockdep will complain.
>
> IIRC there is also a function to reserve an entry before you take the 
> lock. Maybe use that one.


xa_reserve() also takes a spin lock so I think this won't work either 
with gfp_kernel flag.


>
>>
>>
>>>
>>>> +        if (r < 0)
>>>> +            return r;
>>>> +    } else {
>>>> +        unsigned long index;
>>>> +        struct amdgpu_vm *res;
>>>> +
>>>> +        xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>> +        xa_for_each(&adev->vm_manager.pasids, index, res) {
>>>> +            if (res == vm) {
>>>> +                __xa_erase(&adev->vm_manager.pasids, index);
>>>> +                break;
>>>> +            }
>>>> +        }
>>>> + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>>
>>> That is really ugly, why is that necessary?
>>
>> Do you mean the lock around xa_for_each() ? I think I can just used 
>> lock around __xa_erase() or just xa_erase() if just simple spinlock 
>> without flags is enough.
>
> I mean why you use xa_for_each() here?


amdgpu_vm_set_pasid() removes pasid:vm entry when pasid 0 is passed. I 
need xa_for_each()  to find the index of that vm pointer

so that I can pass that to __xa_erase(). I couldn't find an API which 
removes an entry  by value.


Regards,

Nirmoy

>
> Just __xa_erase should be enough.
>
> Christian.
>
>>
>>
>> Regards,
>>
>> Nirmoy
>>
>>
>>>
>>> Christian
>>>
>>>> +    }
>>>> +
>>>> +    vm->pasid = pasid;
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>   /*
>>>>    * vm eviction_lock can be taken in MMU notifiers. Make sure no 
>>>> reclaim-FS
>>>>    * happens while holding this lock anywhere to prevent deadlocks 
>>>> when
>>>> @@ -2940,18 +2973,9 @@ int amdgpu_vm_init(struct amdgpu_device 
>>>> *adev, struct amdgpu_vm *vm, u32 pasid)
>>>>         amdgpu_bo_unreserve(vm->root.bo);
>>>>   -    if (pasid) {
>>>> -        unsigned long flags;
>>>> -
>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>> -        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, 
>>>> pasid + 1,
>>>> -                  GFP_ATOMIC);
>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>> -        if (r < 0)
>>>> -            goto error_free_root;
>>>> -
>>>> -        vm->pasid = pasid;
>>>> -    }
>>>> +    r = amdgpu_vm_set_pasid(adev, vm, pasid);
>>>> +    if (r)
>>>> +        goto error_free_root;
>>>>         INIT_KFIFO(vm->faults);
>>>>   @@ -3039,18 +3063,11 @@ int amdgpu_vm_make_compute(struct 
>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>       if (r)
>>>>           goto unreserve_bo;
>>>>   -    if (pasid) {
>>>> -        unsigned long flags;
>>>> -
>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>> -        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, 
>>>> pasid + 1,
>>>> -                  GFP_ATOMIC);
>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>> -
>>>> -        if (r == -ENOSPC)
>>>> -            goto unreserve_bo;
>>>> -        r = 0;
>>>> -    }
>>>> +    /* remove previous {pasid:vm} entry first */
>>>> +    r = amdgpu_vm_set_pasid(adev, vm, 0);
>>>> +    r = amdgpu_vm_set_pasid(adev, vm, pasid);
>>>> +    if (r)
>>>> +        goto unreserve_bo;
>>>>         /* Check if PD needs to be reinitialized and do it before
>>>>        * changing any other state, in case it fails.
>>>> @@ -3061,7 +3078,7 @@ int amdgpu_vm_make_compute(struct 
>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>                          to_amdgpu_bo_vm(vm->root.bo),
>>>>                          false);
>>>>           if (r)
>>>> -            goto free_idr;
>>>> +            goto free_pasid_entry;
>>>>       }
>>>>         /* Update VM state */
>>>> @@ -3078,7 +3095,7 @@ int amdgpu_vm_make_compute(struct 
>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>           r = amdgpu_bo_sync_wait(vm->root.bo,
>>>>                       AMDGPU_FENCE_OWNER_UNDEFINED, true);
>>>>           if (r)
>>>> -            goto free_idr;
>>>> +            goto free_pasid_entry;
>>>>             vm->update_funcs = &amdgpu_vm_cpu_funcs;
>>>>       } else {
>>>> @@ -3088,31 +3105,14 @@ int amdgpu_vm_make_compute(struct 
>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>       vm->last_update = NULL;
>>>>       vm->is_compute_context = true;
>>>>   -    if (vm->pasid) {
>>>> -        unsigned long flags;
>>>> -
>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>> -        vm->pasid = 0;
>>>> -    }
>>>> -
>>>>       /* Free the shadow bo for compute VM */
>>>> amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
>>>>   -    if (pasid)
>>>> -        vm->pasid = pasid;
>>>> -
>>>>       goto unreserve_bo;
>>>>   -free_idr:
>>>> -    if (pasid) {
>>>> -        unsigned long flags;
>>>> +free_pasid_entry:
>>>> +    amdgpu_vm_set_pasid(adev, vm, 0);
>>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>> -        idr_remove(&adev->vm_manager.pasid_idr, pasid);
>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>> -    }
>>>>   unreserve_bo:
>>>>       amdgpu_bo_unreserve(vm->root.bo);
>>>>       return r;
>>>> @@ -3128,14 +3128,7 @@ int amdgpu_vm_make_compute(struct 
>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>    */
>>>>   void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct 
>>>> amdgpu_vm *vm)
>>>>   {
>>>> -    if (vm->pasid) {
>>>> -        unsigned long flags;
>>>> -
>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>> -    }
>>>> -    vm->pasid = 0;
>>>> +    amdgpu_vm_set_pasid(adev, vm, 0);
>>>>       vm->is_compute_context = false;
>>>>   }
>>>>   @@ -3159,15 +3152,7 @@ void amdgpu_vm_fini(struct amdgpu_device 
>>>> *adev, struct amdgpu_vm *vm)
>>>>         root = amdgpu_bo_ref(vm->root.bo);
>>>>       amdgpu_bo_reserve(root, true);
>>>> -    if (vm->pasid) {
>>>> -        unsigned long flags;
>>>> -
>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>> -        vm->pasid = 0;
>>>> -    }
>>>> -
>>>> +    amdgpu_vm_set_pasid(adev, vm, 0);
>>>>       dma_fence_wait(vm->last_unlocked, false);
>>>>       dma_fence_put(vm->last_unlocked);
>>>>   @@ -3249,8 +3234,7 @@ void amdgpu_vm_manager_init(struct 
>>>> amdgpu_device *adev)
>>>>       adev->vm_manager.vm_update_mode = 0;
>>>>   #endif
>>>>   -    idr_init(&adev->vm_manager.pasid_idr);
>>>> -    spin_lock_init(&adev->vm_manager.pasid_lock);
>>>> +    xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
>>>>   }
>>>>     /**
>>>> @@ -3262,8 +3246,8 @@ void amdgpu_vm_manager_init(struct 
>>>> amdgpu_device *adev)
>>>>    */
>>>>   void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
>>>>   {
>>>> - WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
>>>> -    idr_destroy(&adev->vm_manager.pasid_idr);
>>>> +    WARN_ON(!xa_empty(&adev->vm_manager.pasids));
>>>> +    xa_destroy(&adev->vm_manager.pasids);
>>>>         amdgpu_vmid_mgr_fini(adev);
>>>>   }
>>>> @@ -3332,13 +3316,13 @@ void amdgpu_vm_get_task_info(struct 
>>>> amdgpu_device *adev, u32 pasid,
>>>>       struct amdgpu_vm *vm;
>>>>       unsigned long flags;
>>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>> +    xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>>   -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>> +    vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>       if (vm)
>>>>           *task_info = vm->task_info;
>>>>   - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>> +    xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>>>   }
>>>>     /**
>>>> @@ -3380,15 +3364,15 @@ bool amdgpu_vm_handle_fault(struct 
>>>> amdgpu_device *adev, u32 pasid,
>>>>       struct amdgpu_vm *vm;
>>>>       int r;
>>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>>>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>> +    xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>>>> +    vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>       if (vm) {
>>>>           root = amdgpu_bo_ref(vm->root.bo);
>>>>           is_compute_context = vm->is_compute_context;
>>>>       } else {
>>>>           root = NULL;
>>>>       }
>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>>>> +    xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
>>>>         if (!root)
>>>>           return false;
>>>> @@ -3406,11 +3390,11 @@ bool amdgpu_vm_handle_fault(struct 
>>>> amdgpu_device *adev, u32 pasid,
>>>>           goto error_unref;
>>>>         /* Double check that the VM still exists */
>>>> -    spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>>>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>> +    xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>>>> +    vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>       if (vm && vm->root.bo != root)
>>>>           vm = NULL;
>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>>>> +    xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
>>>>       if (!vm)
>>>>           goto error_unlock;
>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> index ddb85a85cbba..31c467764162 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>> @@ -359,8 +359,7 @@ struct amdgpu_vm_manager {
>>>>       /* PASID to VM mapping, will be used in interrupt context to
>>>>        * look up VM of a page fault
>>>>        */
>>>> -    struct idr                pasid_idr;
>>>> -    spinlock_t                pasid_lock;
>>>> +    struct xarray                pasids;
>>>>   };
>>>>     struct amdgpu_bo_va_mapping;
>>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: use xarray for storing pasid in vm
  2021-06-23 14:54         ` Das, Nirmoy
@ 2021-06-23 15:02           ` Christian König
  2021-06-23 15:24             ` Das, Nirmoy
  0 siblings, 1 reply; 11+ messages in thread
From: Christian König @ 2021-06-23 15:02 UTC (permalink / raw)
  To: Das, Nirmoy, Christian König, amd-gfx; +Cc: Felix.Kuehling



Am 23.06.21 um 16:54 schrieb Das, Nirmoy:
>
> On 6/23/2021 3:40 PM, Christian König wrote:
>>
>>
>> Am 23.06.21 um 15:11 schrieb Das, Nirmoy:
>>>
>>> On 6/23/2021 2:50 PM, Christian König wrote:
>>>>
>>>>
>>>> Am 23.06.21 um 14:25 schrieb Nirmoy Das:
>>>>> Replace idr with xarray as we actually need hash functionality.
>>>>> Cleanup code related to vm pasid by adding helper function.
>>>>>
>>>>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>>>>> ---
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 134 
>>>>> +++++++++++--------------
>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   3 +-
>>>>>   2 files changed, 60 insertions(+), 77 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> index be841d62a1d4..e047e56a4be2 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> @@ -87,6 +87,39 @@ struct amdgpu_prt_cb {
>>>>>       struct dma_fence_cb cb;
>>>>>   };
>>>>>   +static int amdgpu_vm_set_pasid(struct amdgpu_device *adev,
>>>>> +                   struct amdgpu_vm *vm,
>>>>> +                   unsigned long pasid)
>>>>
>>>> Some kerneldoc please describing why we have that function.
>>>
>>>
>>> Alright.
>>>
>>>
>>>>
>>>>> +{
>>>>> +    unsigned long flags;
>>>>> +    int r;
>>>>> +
>>>>> +    if (pasid) {
>>>>
>>>> You should probably reorder the code so that the old pasid is first 
>>>> removed and then eventually the new one added.
>>>>
>>>>> + xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>>> +        r = xa_err(__xa_store(&adev->vm_manager.pasids, pasid, vm,
>>>>> +                      GFP_ATOMIC));
>>>>> + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>>>
>>>> As far as I can see this can just use xa_store() which also drops 
>>>> the need for GFP_ATOMIC here.
>>>
>>>
>>> Do we need to have this irqsave/restore to keep passids safe for 
>>> amdgpu_vm_handle_fault() ?
>>
>> No, we need the VM safe not the pasid.
>
>
> Would spin_lock_irq be enough to keep the VM safe then I can use 
> xa_store_irq() and remove some extra locking code?

Yes, when when amdgpu_vm_set_pasid() is called we can be 100% sure that 
the VM won't be freed inside the function or otherwise I really question 
the saneness of the code.

>>
>>> xa_store() takes the spinlock without irq flags so I wanted to keep 
>>> old behavior.
>>
>> Yeah, that's indeed problematic. You need to keep that straight or 
>> lockdep will complain.
>>
>> IIRC there is also a function to reserve an entry before you take the 
>> lock. Maybe use that one.
>
>
> xa_reserve() also takes a spin lock so I think this won't work either 
> with gfp_kernel flag.

No, I just double checked. You can use xa_store_irq() with GFP_KERNEL.

The implementation makes sure to not allocate while holding the lock.

>
>
>>
>>>
>>>
>>>>
>>>>> +        if (r < 0)
>>>>> +            return r;
>>>>> +    } else {
>>>>> +        unsigned long index;
>>>>> +        struct amdgpu_vm *res;
>>>>> +
>>>>> +        xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>>> +        xa_for_each(&adev->vm_manager.pasids, index, res) {
>>>>> +            if (res == vm) {
>>>>> + __xa_erase(&adev->vm_manager.pasids, index);
>>>>> +                break;
>>>>> +            }
>>>>> +        }
>>>>> + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>>>
>>>> That is really ugly, why is that necessary?
>>>
>>> Do you mean the lock around xa_for_each() ? I think I can just used 
>>> lock around __xa_erase() or just xa_erase() if just simple spinlock 
>>> without flags is enough.
>>
>> I mean why you use xa_for_each() here?
>
>
> amdgpu_vm_set_pasid() removes pasid:vm entry when pasid 0 is passed. I 
> need xa_for_each()  to find the index of that vm pointer
>
> so that I can pass that to __xa_erase(). I couldn't find an API which 
> removes an entry  by value.

Ok sounds like you don't understand what semantics I suggest. Let me try 
once more:

if (vm->pasid == pasid)
     return 0;

if (vm->pasid)
     xa_erase_irq(..., vm->pasid);

if (pasid)
     xa_store_irq(..., pasid, vm);

vm->pasid = pasid;

You of course need to add error handling and everything, but in general 
that should do it.

Thanks,
Christian.

>
>
> Regards,
>
> Nirmoy
>
>>
>> Just __xa_erase should be enough.
>>
>> Christian.
>>
>>>
>>>
>>> Regards,
>>>
>>> Nirmoy
>>>
>>>
>>>>
>>>> Christian
>>>>
>>>>> +    }
>>>>> +
>>>>> +    vm->pasid = pasid;
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>>   /*
>>>>>    * vm eviction_lock can be taken in MMU notifiers. Make sure no 
>>>>> reclaim-FS
>>>>>    * happens while holding this lock anywhere to prevent deadlocks 
>>>>> when
>>>>> @@ -2940,18 +2973,9 @@ int amdgpu_vm_init(struct amdgpu_device 
>>>>> *adev, struct amdgpu_vm *vm, u32 pasid)
>>>>>         amdgpu_bo_unreserve(vm->root.bo);
>>>>>   -    if (pasid) {
>>>>> -        unsigned long flags;
>>>>> -
>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> -        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, 
>>>>> pasid + 1,
>>>>> -                  GFP_ATOMIC);
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> -        if (r < 0)
>>>>> -            goto error_free_root;
>>>>> -
>>>>> -        vm->pasid = pasid;
>>>>> -    }
>>>>> +    r = amdgpu_vm_set_pasid(adev, vm, pasid);
>>>>> +    if (r)
>>>>> +        goto error_free_root;
>>>>>         INIT_KFIFO(vm->faults);
>>>>>   @@ -3039,18 +3063,11 @@ int amdgpu_vm_make_compute(struct 
>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>       if (r)
>>>>>           goto unreserve_bo;
>>>>>   -    if (pasid) {
>>>>> -        unsigned long flags;
>>>>> -
>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> -        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, 
>>>>> pasid + 1,
>>>>> -                  GFP_ATOMIC);
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> -
>>>>> -        if (r == -ENOSPC)
>>>>> -            goto unreserve_bo;
>>>>> -        r = 0;
>>>>> -    }
>>>>> +    /* remove previous {pasid:vm} entry first */
>>>>> +    r = amdgpu_vm_set_pasid(adev, vm, 0);
>>>>> +    r = amdgpu_vm_set_pasid(adev, vm, pasid);
>>>>> +    if (r)
>>>>> +        goto unreserve_bo;
>>>>>         /* Check if PD needs to be reinitialized and do it before
>>>>>        * changing any other state, in case it fails.
>>>>> @@ -3061,7 +3078,7 @@ int amdgpu_vm_make_compute(struct 
>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>                          to_amdgpu_bo_vm(vm->root.bo),
>>>>>                          false);
>>>>>           if (r)
>>>>> -            goto free_idr;
>>>>> +            goto free_pasid_entry;
>>>>>       }
>>>>>         /* Update VM state */
>>>>> @@ -3078,7 +3095,7 @@ int amdgpu_vm_make_compute(struct 
>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>           r = amdgpu_bo_sync_wait(vm->root.bo,
>>>>>                       AMDGPU_FENCE_OWNER_UNDEFINED, true);
>>>>>           if (r)
>>>>> -            goto free_idr;
>>>>> +            goto free_pasid_entry;
>>>>>             vm->update_funcs = &amdgpu_vm_cpu_funcs;
>>>>>       } else {
>>>>> @@ -3088,31 +3105,14 @@ int amdgpu_vm_make_compute(struct 
>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>       vm->last_update = NULL;
>>>>>       vm->is_compute_context = true;
>>>>>   -    if (vm->pasid) {
>>>>> -        unsigned long flags;
>>>>> -
>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> -        vm->pasid = 0;
>>>>> -    }
>>>>> -
>>>>>       /* Free the shadow bo for compute VM */
>>>>> amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
>>>>>   -    if (pasid)
>>>>> -        vm->pasid = pasid;
>>>>> -
>>>>>       goto unreserve_bo;
>>>>>   -free_idr:
>>>>> -    if (pasid) {
>>>>> -        unsigned long flags;
>>>>> +free_pasid_entry:
>>>>> +    amdgpu_vm_set_pasid(adev, vm, 0);
>>>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> -        idr_remove(&adev->vm_manager.pasid_idr, pasid);
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> -    }
>>>>>   unreserve_bo:
>>>>>       amdgpu_bo_unreserve(vm->root.bo);
>>>>>       return r;
>>>>> @@ -3128,14 +3128,7 @@ int amdgpu_vm_make_compute(struct 
>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>    */
>>>>>   void amdgpu_vm_release_compute(struct amdgpu_device *adev, 
>>>>> struct amdgpu_vm *vm)
>>>>>   {
>>>>> -    if (vm->pasid) {
>>>>> -        unsigned long flags;
>>>>> -
>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> -    }
>>>>> -    vm->pasid = 0;
>>>>> +    amdgpu_vm_set_pasid(adev, vm, 0);
>>>>>       vm->is_compute_context = false;
>>>>>   }
>>>>>   @@ -3159,15 +3152,7 @@ void amdgpu_vm_fini(struct amdgpu_device 
>>>>> *adev, struct amdgpu_vm *vm)
>>>>>         root = amdgpu_bo_ref(vm->root.bo);
>>>>>       amdgpu_bo_reserve(root, true);
>>>>> -    if (vm->pasid) {
>>>>> -        unsigned long flags;
>>>>> -
>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> -        vm->pasid = 0;
>>>>> -    }
>>>>> -
>>>>> +    amdgpu_vm_set_pasid(adev, vm, 0);
>>>>>       dma_fence_wait(vm->last_unlocked, false);
>>>>>       dma_fence_put(vm->last_unlocked);
>>>>>   @@ -3249,8 +3234,7 @@ void amdgpu_vm_manager_init(struct 
>>>>> amdgpu_device *adev)
>>>>>       adev->vm_manager.vm_update_mode = 0;
>>>>>   #endif
>>>>>   -    idr_init(&adev->vm_manager.pasid_idr);
>>>>> -    spin_lock_init(&adev->vm_manager.pasid_lock);
>>>>> +    xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
>>>>>   }
>>>>>     /**
>>>>> @@ -3262,8 +3246,8 @@ void amdgpu_vm_manager_init(struct 
>>>>> amdgpu_device *adev)
>>>>>    */
>>>>>   void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
>>>>>   {
>>>>> - WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
>>>>> -    idr_destroy(&adev->vm_manager.pasid_idr);
>>>>> +    WARN_ON(!xa_empty(&adev->vm_manager.pasids));
>>>>> +    xa_destroy(&adev->vm_manager.pasids);
>>>>>         amdgpu_vmid_mgr_fini(adev);
>>>>>   }
>>>>> @@ -3332,13 +3316,13 @@ void amdgpu_vm_get_task_info(struct 
>>>>> amdgpu_device *adev, u32 pasid,
>>>>>       struct amdgpu_vm *vm;
>>>>>       unsigned long flags;
>>>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>> +    xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>>>   -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>> +    vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>>       if (vm)
>>>>>           *task_info = vm->task_info;
>>>>>   - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>> +    xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>>>>   }
>>>>>     /**
>>>>> @@ -3380,15 +3364,15 @@ bool amdgpu_vm_handle_fault(struct 
>>>>> amdgpu_device *adev, u32 pasid,
>>>>>       struct amdgpu_vm *vm;
>>>>>       int r;
>>>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>>>>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>> +    xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>>>>> +    vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>>       if (vm) {
>>>>>           root = amdgpu_bo_ref(vm->root.bo);
>>>>>           is_compute_context = vm->is_compute_context;
>>>>>       } else {
>>>>>           root = NULL;
>>>>>       }
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>>>>> +    xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
>>>>>         if (!root)
>>>>>           return false;
>>>>> @@ -3406,11 +3390,11 @@ bool amdgpu_vm_handle_fault(struct 
>>>>> amdgpu_device *adev, u32 pasid,
>>>>>           goto error_unref;
>>>>>         /* Double check that the VM still exists */
>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>>>>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>> +    xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>>>>> +    vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>>       if (vm && vm->root.bo != root)
>>>>>           vm = NULL;
>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>>>>> +    xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
>>>>>       if (!vm)
>>>>>           goto error_unlock;
>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> index ddb85a85cbba..31c467764162 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>> @@ -359,8 +359,7 @@ struct amdgpu_vm_manager {
>>>>>       /* PASID to VM mapping, will be used in interrupt context to
>>>>>        * look up VM of a page fault
>>>>>        */
>>>>> -    struct idr                pasid_idr;
>>>>> -    spinlock_t                pasid_lock;
>>>>> +    struct xarray                pasids;
>>>>>   };
>>>>>     struct amdgpu_bo_va_mapping;
>>>>
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/2] drm/amdgpu: use xarray for storing pasid in vm
  2021-06-23 15:02           ` Christian König
@ 2021-06-23 15:24             ` Das, Nirmoy
  0 siblings, 0 replies; 11+ messages in thread
From: Das, Nirmoy @ 2021-06-23 15:24 UTC (permalink / raw)
  To: Christian König, Christian König, amd-gfx; +Cc: Felix.Kuehling


On 6/23/2021 5:02 PM, Christian König wrote:
>
>
> Am 23.06.21 um 16:54 schrieb Das, Nirmoy:
>>
>> On 6/23/2021 3:40 PM, Christian König wrote:
>>>
>>>
>>> Am 23.06.21 um 15:11 schrieb Das, Nirmoy:
>>>>
>>>> On 6/23/2021 2:50 PM, Christian König wrote:
>>>>>
>>>>>
>>>>> Am 23.06.21 um 14:25 schrieb Nirmoy Das:
>>>>>> Replace idr with xarray as we actually need hash functionality.
>>>>>> Cleanup code related to vm pasid by adding helper function.
>>>>>>
>>>>>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>>>>>> ---
>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 134 
>>>>>> +++++++++++--------------
>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |   3 +-
>>>>>>   2 files changed, 60 insertions(+), 77 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> index be841d62a1d4..e047e56a4be2 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> @@ -87,6 +87,39 @@ struct amdgpu_prt_cb {
>>>>>>       struct dma_fence_cb cb;
>>>>>>   };
>>>>>>   +static int amdgpu_vm_set_pasid(struct amdgpu_device *adev,
>>>>>> +                   struct amdgpu_vm *vm,
>>>>>> +                   unsigned long pasid)
>>>>>
>>>>> Some kerneldoc please describing why we have that function.
>>>>
>>>>
>>>> Alright.
>>>>
>>>>
>>>>>
>>>>>> +{
>>>>>> +    unsigned long flags;
>>>>>> +    int r;
>>>>>> +
>>>>>> +    if (pasid) {
>>>>>
>>>>> You should probably reorder the code so that the old pasid is 
>>>>> first removed and then eventually the new one added.
>>>>>
>>>>>> + xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>>>> +        r = xa_err(__xa_store(&adev->vm_manager.pasids, pasid, vm,
>>>>>> +                      GFP_ATOMIC));
>>>>>> + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>>>>
>>>>> As far as I can see this can just use xa_store() which also drops 
>>>>> the need for GFP_ATOMIC here.
>>>>
>>>>
>>>> Do we need to have this irqsave/restore to keep passids safe for 
>>>> amdgpu_vm_handle_fault() ?
>>>
>>> No, we need the VM safe not the pasid.
>>
>>
>> Would spin_lock_irq be enough to keep the VM safe then I can use 
>> xa_store_irq() and remove some extra locking code?
>
> Yes, when when amdgpu_vm_set_pasid() is called we can be 100% sure 
> that the VM won't be freed inside the function or otherwise I really 
> question the saneness of the code.
>
>>>
>>>> xa_store() takes the spinlock without irq flags so I wanted to keep 
>>>> old behavior.
>>>
>>> Yeah, that's indeed problematic. You need to keep that straight or 
>>> lockdep will complain.
>>>
>>> IIRC there is also a function to reserve an entry before you take 
>>> the lock. Maybe use that one.
>>
>>
>> xa_reserve() also takes a spin lock so I think this won't work either 
>> with gfp_kernel flag.
>
> No, I just double checked. You can use xa_store_irq() with GFP_KERNEL.
>
> The implementation makes sure to not allocate while holding the lock.
>
>>
>>
>>>
>>>>
>>>>
>>>>>
>>>>>> +        if (r < 0)
>>>>>> +            return r;
>>>>>> +    } else {
>>>>>> +        unsigned long index;
>>>>>> +        struct amdgpu_vm *res;
>>>>>> +
>>>>>> + xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>>>> +        xa_for_each(&adev->vm_manager.pasids, index, res) {
>>>>>> +            if (res == vm) {
>>>>>> + __xa_erase(&adev->vm_manager.pasids, index);
>>>>>> +                break;
>>>>>> +            }
>>>>>> +        }
>>>>>> + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>>>>
>>>>> That is really ugly, why is that necessary?
>>>>
>>>> Do you mean the lock around xa_for_each() ? I think I can just used 
>>>> lock around __xa_erase() or just xa_erase() if just simple spinlock 
>>>> without flags is enough.
>>>
>>> I mean why you use xa_for_each() here?
>>
>>
>> amdgpu_vm_set_pasid() removes pasid:vm entry when pasid 0 is passed. 
>> I need xa_for_each()  to find the index of that vm pointer
>>
>> so that I can pass that to __xa_erase(). I couldn't find an API which 
>> removes an entry  by value.
>
> Ok sounds like you don't understand what semantics I suggest. Let me 
> try once more:
>
> if (vm->pasid == pasid)
>     return 0;
>
> if (vm->pasid)
>     xa_erase_irq(..., vm->pasid);
>
> if (pasid)
>     xa_store_irq(..., pasid, vm);
>
> vm->pasid = pasid;
>
> You of course need to add error handling and everything, but in 
> general that should do it.


I guess I was over thinking. This looks much straight forward than what 
I was thinking.


Thanks,

Nirmoy


>
> Thanks,
> Christian.
>
>>
>>
>> Regards,
>>
>> Nirmoy
>>
>>>
>>> Just __xa_erase should be enough.
>>>
>>> Christian.
>>>
>>>>
>>>>
>>>> Regards,
>>>>
>>>> Nirmoy
>>>>
>>>>
>>>>>
>>>>> Christian
>>>>>
>>>>>> +    }
>>>>>> +
>>>>>> +    vm->pasid = pasid;
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>>   /*
>>>>>>    * vm eviction_lock can be taken in MMU notifiers. Make sure no 
>>>>>> reclaim-FS
>>>>>>    * happens while holding this lock anywhere to prevent 
>>>>>> deadlocks when
>>>>>> @@ -2940,18 +2973,9 @@ int amdgpu_vm_init(struct amdgpu_device 
>>>>>> *adev, struct amdgpu_vm *vm, u32 pasid)
>>>>>>         amdgpu_bo_unreserve(vm->root.bo);
>>>>>>   -    if (pasid) {
>>>>>> -        unsigned long flags;
>>>>>> -
>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> -        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, 
>>>>>> pasid + 1,
>>>>>> -                  GFP_ATOMIC);
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> -        if (r < 0)
>>>>>> -            goto error_free_root;
>>>>>> -
>>>>>> -        vm->pasid = pasid;
>>>>>> -    }
>>>>>> +    r = amdgpu_vm_set_pasid(adev, vm, pasid);
>>>>>> +    if (r)
>>>>>> +        goto error_free_root;
>>>>>>         INIT_KFIFO(vm->faults);
>>>>>>   @@ -3039,18 +3063,11 @@ int amdgpu_vm_make_compute(struct 
>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>>       if (r)
>>>>>>           goto unreserve_bo;
>>>>>>   -    if (pasid) {
>>>>>> -        unsigned long flags;
>>>>>> -
>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> -        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, 
>>>>>> pasid + 1,
>>>>>> -                  GFP_ATOMIC);
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> -
>>>>>> -        if (r == -ENOSPC)
>>>>>> -            goto unreserve_bo;
>>>>>> -        r = 0;
>>>>>> -    }
>>>>>> +    /* remove previous {pasid:vm} entry first */
>>>>>> +    r = amdgpu_vm_set_pasid(adev, vm, 0);
>>>>>> +    r = amdgpu_vm_set_pasid(adev, vm, pasid);
>>>>>> +    if (r)
>>>>>> +        goto unreserve_bo;
>>>>>>         /* Check if PD needs to be reinitialized and do it before
>>>>>>        * changing any other state, in case it fails.
>>>>>> @@ -3061,7 +3078,7 @@ int amdgpu_vm_make_compute(struct 
>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>> to_amdgpu_bo_vm(vm->root.bo),
>>>>>>                          false);
>>>>>>           if (r)
>>>>>> -            goto free_idr;
>>>>>> +            goto free_pasid_entry;
>>>>>>       }
>>>>>>         /* Update VM state */
>>>>>> @@ -3078,7 +3095,7 @@ int amdgpu_vm_make_compute(struct 
>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>>           r = amdgpu_bo_sync_wait(vm->root.bo,
>>>>>>                       AMDGPU_FENCE_OWNER_UNDEFINED, true);
>>>>>>           if (r)
>>>>>> -            goto free_idr;
>>>>>> +            goto free_pasid_entry;
>>>>>>             vm->update_funcs = &amdgpu_vm_cpu_funcs;
>>>>>>       } else {
>>>>>> @@ -3088,31 +3105,14 @@ int amdgpu_vm_make_compute(struct 
>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>>       vm->last_update = NULL;
>>>>>>       vm->is_compute_context = true;
>>>>>>   -    if (vm->pasid) {
>>>>>> -        unsigned long flags;
>>>>>> -
>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> -        vm->pasid = 0;
>>>>>> -    }
>>>>>> -
>>>>>>       /* Free the shadow bo for compute VM */
>>>>>> amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
>>>>>>   -    if (pasid)
>>>>>> -        vm->pasid = pasid;
>>>>>> -
>>>>>>       goto unreserve_bo;
>>>>>>   -free_idr:
>>>>>> -    if (pasid) {
>>>>>> -        unsigned long flags;
>>>>>> +free_pasid_entry:
>>>>>> +    amdgpu_vm_set_pasid(adev, vm, 0);
>>>>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> -        idr_remove(&adev->vm_manager.pasid_idr, pasid);
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> -    }
>>>>>>   unreserve_bo:
>>>>>>       amdgpu_bo_unreserve(vm->root.bo);
>>>>>>       return r;
>>>>>> @@ -3128,14 +3128,7 @@ int amdgpu_vm_make_compute(struct 
>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>>    */
>>>>>>   void amdgpu_vm_release_compute(struct amdgpu_device *adev, 
>>>>>> struct amdgpu_vm *vm)
>>>>>>   {
>>>>>> -    if (vm->pasid) {
>>>>>> -        unsigned long flags;
>>>>>> -
>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> -    }
>>>>>> -    vm->pasid = 0;
>>>>>> +    amdgpu_vm_set_pasid(adev, vm, 0);
>>>>>>       vm->is_compute_context = false;
>>>>>>   }
>>>>>>   @@ -3159,15 +3152,7 @@ void amdgpu_vm_fini(struct amdgpu_device 
>>>>>> *adev, struct amdgpu_vm *vm)
>>>>>>         root = amdgpu_bo_ref(vm->root.bo);
>>>>>>       amdgpu_bo_reserve(root, true);
>>>>>> -    if (vm->pasid) {
>>>>>> -        unsigned long flags;
>>>>>> -
>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> -        idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> -        vm->pasid = 0;
>>>>>> -    }
>>>>>> -
>>>>>> +    amdgpu_vm_set_pasid(adev, vm, 0);
>>>>>>       dma_fence_wait(vm->last_unlocked, false);
>>>>>>       dma_fence_put(vm->last_unlocked);
>>>>>>   @@ -3249,8 +3234,7 @@ void amdgpu_vm_manager_init(struct 
>>>>>> amdgpu_device *adev)
>>>>>>       adev->vm_manager.vm_update_mode = 0;
>>>>>>   #endif
>>>>>>   -    idr_init(&adev->vm_manager.pasid_idr);
>>>>>> - spin_lock_init(&adev->vm_manager.pasid_lock);
>>>>>> +    xa_init_flags(&adev->vm_manager.pasids, XA_FLAGS_LOCK_IRQ);
>>>>>>   }
>>>>>>     /**
>>>>>> @@ -3262,8 +3246,8 @@ void amdgpu_vm_manager_init(struct 
>>>>>> amdgpu_device *adev)
>>>>>>    */
>>>>>>   void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
>>>>>>   {
>>>>>> - WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
>>>>>> -    idr_destroy(&adev->vm_manager.pasid_idr);
>>>>>> + WARN_ON(!xa_empty(&adev->vm_manager.pasids));
>>>>>> +    xa_destroy(&adev->vm_manager.pasids);
>>>>>>         amdgpu_vmid_mgr_fini(adev);
>>>>>>   }
>>>>>> @@ -3332,13 +3316,13 @@ void amdgpu_vm_get_task_info(struct 
>>>>>> amdgpu_device *adev, u32 pasid,
>>>>>>       struct amdgpu_vm *vm;
>>>>>>       unsigned long flags;
>>>>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> +    xa_lock_irqsave(&adev->vm_manager.pasids, flags);
>>>>>>   -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>>> +    vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>>>       if (vm)
>>>>>>           *task_info = vm->task_info;
>>>>>>   - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
>>>>>>   }
>>>>>>     /**
>>>>>> @@ -3380,15 +3364,15 @@ bool amdgpu_vm_handle_fault(struct 
>>>>>> amdgpu_device *adev, u32 pasid,
>>>>>>       struct amdgpu_vm *vm;
>>>>>>       int r;
>>>>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>>>>>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>>> +    xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>>>>>> +    vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>>>       if (vm) {
>>>>>>           root = amdgpu_bo_ref(vm->root.bo);
>>>>>>           is_compute_context = vm->is_compute_context;
>>>>>>       } else {
>>>>>>           root = NULL;
>>>>>>       }
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>>>>>> + xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
>>>>>>         if (!root)
>>>>>>           return false;
>>>>>> @@ -3406,11 +3390,11 @@ bool amdgpu_vm_handle_fault(struct 
>>>>>> amdgpu_device *adev, u32 pasid,
>>>>>>           goto error_unref;
>>>>>>         /* Double check that the VM still exists */
>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>>>>>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>>> +    xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
>>>>>> +    vm = xa_load(&adev->vm_manager.pasids, pasid);
>>>>>>       if (vm && vm->root.bo != root)
>>>>>>           vm = NULL;
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>>>>>> + xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
>>>>>>       if (!vm)
>>>>>>           goto error_unlock;
>>>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> index ddb85a85cbba..31c467764162 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> @@ -359,8 +359,7 @@ struct amdgpu_vm_manager {
>>>>>>       /* PASID to VM mapping, will be used in interrupt context to
>>>>>>        * look up VM of a page fault
>>>>>>        */
>>>>>> -    struct idr                pasid_idr;
>>>>>> -    spinlock_t                pasid_lock;
>>>>>> +    struct xarray                pasids;
>>>>>>   };
>>>>>>     struct amdgpu_bo_va_mapping;
>>>>>
>>>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: free pasid early before converting a vm
  2021-06-23 12:25 [PATCH 1/2] drm/amdgpu: free pasid early before converting a vm Nirmoy Das
  2021-06-23 12:25 ` [PATCH 2/2] drm/amdgpu: use xarray for storing pasid in vm Nirmoy Das
@ 2021-06-23 12:46 ` Christian König
  2021-06-23 19:05 ` Felix Kuehling
  2 siblings, 0 replies; 11+ messages in thread
From: Christian König @ 2021-06-23 12:46 UTC (permalink / raw)
  To: Nirmoy Das, amd-gfx; +Cc: Felix.Kuehling, Christian.Koenig

Am 23.06.21 um 14:25 schrieb Nirmoy Das:
> VM code should not be responsible for freeing pasid as pasid
> gets allocated outside of VM code, before initializing a vm.
>
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 ++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c           | 5 -----
>   2 files changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index f96598279593..42e22b1fdfee 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1287,6 +1287,12 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
>   	if (avm->process_info)
>   		return -EINVAL;
>   
> +	/* Free the original amdgpu allocated pasid,
> +	 * will be replaced with kfd allocated pasid
> +	 */
> +	if (avm->pasid)
> +		amdgpu_pasid_free(avm->pasid);
> +
>   	/* Convert VM into a compute VM */
>   	ret = amdgpu_vm_make_compute(adev, avm, pasid);
>   	if (ret)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 63975bda8e76..be841d62a1d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -3094,11 +3094,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>   		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>   		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
> -
> -		/* Free the original amdgpu allocated pasid
> -		 * Will be replaced with kfd allocated pasid
> -		 */
> -		amdgpu_pasid_free(vm->pasid);
>   		vm->pasid = 0;
>   	}
>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: free pasid early before converting a vm
  2021-06-23 12:25 [PATCH 1/2] drm/amdgpu: free pasid early before converting a vm Nirmoy Das
  2021-06-23 12:25 ` [PATCH 2/2] drm/amdgpu: use xarray for storing pasid in vm Nirmoy Das
  2021-06-23 12:46 ` [PATCH 1/2] drm/amdgpu: free pasid early before converting a vm Christian König
@ 2021-06-23 19:05 ` Felix Kuehling
  2021-06-29  7:48   ` Das, Nirmoy
  2 siblings, 1 reply; 11+ messages in thread
From: Felix Kuehling @ 2021-06-23 19:05 UTC (permalink / raw)
  To: Nirmoy Das, amd-gfx; +Cc: Christian.Koenig

On 2021-06-23 8:25 a.m., Nirmoy Das wrote:
> VM code should not be responsible for freeing pasid as pasid
> gets allocated outside of VM code, before initializing a vm.
>
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 ++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c           | 5 -----
>   2 files changed, 6 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index f96598279593..42e22b1fdfee 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1287,6 +1287,12 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
>   	if (avm->process_info)
>   		return -EINVAL;
>   
> +	/* Free the original amdgpu allocated pasid,
> +	 * will be replaced with kfd allocated pasid
> +	 */
> +	if (avm->pasid)
> +		amdgpu_pasid_free(avm->pasid);
> +

If amdgpu_vm_make_compute fails, amdgpu_driver_postclose_kms will try to 
free the PASID a second time because vm->pasid still points to it. You 
could fix that by moving the amdgpu_pasid_free after a successful 
amdgpu_vm_make_compute call. But that still means you're making an 
assumption about the side effect of amdgpu_vm_make_compute on vm->pasid. 
IMHO, splitting the amdgpu_pasid_free and the vm->pasid = 0 into two 
different functions is a mistake.

Regards,
   Felix


>   	/* Convert VM into a compute VM */
>   	ret = amdgpu_vm_make_compute(adev, avm, pasid);
>   	if (ret)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 63975bda8e76..be841d62a1d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -3094,11 +3094,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>   		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>   		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>   		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
> -
> -		/* Free the original amdgpu allocated pasid
> -		 * Will be replaced with kfd allocated pasid
> -		 */
> -		amdgpu_pasid_free(vm->pasid);
>   		vm->pasid = 0;
>   	}
>   
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu: free pasid early before converting a vm
  2021-06-23 19:05 ` Felix Kuehling
@ 2021-06-29  7:48   ` Das, Nirmoy
  0 siblings, 0 replies; 11+ messages in thread
From: Das, Nirmoy @ 2021-06-29  7:48 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx; +Cc: Christian.Koenig


On 6/23/2021 9:05 PM, Felix Kuehling wrote:
> On 2021-06-23 8:25 a.m., Nirmoy Das wrote:
>> VM code should not be responsible for freeing pasid as pasid
>> gets allocated outside of VM code, before initializing a vm.
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 ++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c           | 5 -----
>>   2 files changed, 6 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index f96598279593..42e22b1fdfee 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -1287,6 +1287,12 @@ int 
>> amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
>>       if (avm->process_info)
>>           return -EINVAL;
>>   +    /* Free the original amdgpu allocated pasid,
>> +     * will be replaced with kfd allocated pasid
>> +     */
>> +    if (avm->pasid)
>> +        amdgpu_pasid_free(avm->pasid);
>> +
>
> If amdgpu_vm_make_compute fails, amdgpu_driver_postclose_kms will try 
> to free the PASID a second time because vm->pasid still points to it. 
> You could fix that by moving the amdgpu_pasid_free after a successful 
> amdgpu_vm_make_compute call. But that still means you're making an 
> assumption about the side effect of amdgpu_vm_make_compute on 
> vm->pasid. IMHO, splitting the amdgpu_pasid_free and the vm->pasid = 0 
> into two different functions is a mistake.


Makes sense, I think I can export amdgpu_vm_set_pasid() and then keep 
amdgpu_pasid_free and the vm->pasid = 0 together.


Regards,

Nirmoy

>
> Regards,
>   Felix
>
>
>>       /* Convert VM into a compute VM */
>>       ret = amdgpu_vm_make_compute(adev, avm, pasid);
>>       if (ret)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 63975bda8e76..be841d62a1d4 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -3094,11 +3094,6 @@ int amdgpu_vm_make_compute(struct 
>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>           spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>           idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>> spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>> -
>> -        /* Free the original amdgpu allocated pasid
>> -         * Will be replaced with kfd allocated pasid
>> -         */
>> -        amdgpu_pasid_free(vm->pasid);
>>           vm->pasid = 0;
>>       }
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-06-29  7:48 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-23 12:25 [PATCH 1/2] drm/amdgpu: free pasid early before converting a vm Nirmoy Das
2021-06-23 12:25 ` [PATCH 2/2] drm/amdgpu: use xarray for storing pasid in vm Nirmoy Das
2021-06-23 12:50   ` Christian König
2021-06-23 13:11     ` Das, Nirmoy
2021-06-23 13:40       ` Christian König
2021-06-23 14:54         ` Das, Nirmoy
2021-06-23 15:02           ` Christian König
2021-06-23 15:24             ` Das, Nirmoy
2021-06-23 12:46 ` [PATCH 1/2] drm/amdgpu: free pasid early before converting a vm Christian König
2021-06-23 19:05 ` Felix Kuehling
2021-06-29  7:48   ` Das, Nirmoy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.