amd-gfx.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdkfd: Add SVM range mapped_to_gpu flag
@ 2022-04-20  0:47 Philip Yang
  2022-04-20  0:47 ` [PATCH 2/2] drm/amdkfd: Update mapping if range attributes changed Philip Yang
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Philip Yang @ 2022-04-20  0:47 UTC (permalink / raw)
  To: amd-gfx; +Cc: Philip Yang, Felix.Kuehling

To avoid unnecessary unmap SVM range from GPUs if range is not mapped on
GPUs when migrating the range. This flag will also be used to flush TLB
when updating the existing mapping on GPUs.

It is protected by prange->migrate_mutex and mmap read lock in MMU
notifier callback.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 17 ++++++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  1 +
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 5afe216cf099..6be1695f3a09 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -951,6 +951,7 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
 	new->prefetch_loc = old->prefetch_loc;
 	new->actual_loc = old->actual_loc;
 	new->granularity = old->granularity;
+	new->mapped_to_gpu = old->mapped_to_gpu;
 	bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
 	bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
 
@@ -1204,6 +1205,17 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
 	uint32_t gpuidx;
 	int r = 0;
 
+	if (!prange->mapped_to_gpu) {
+		pr_debug("prange 0x%p [0x%lx 0x%lx] not mapped to GPU\n",
+			 prange, prange->start, prange->last);
+		return 0;
+	}
+
+	if (prange->start == start && prange->last == last) {
+		pr_debug("unmap svms 0x%p prange 0x%p\n", prange->svms, prange);
+		prange->mapped_to_gpu = false;
+	}
+
 	bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
 		  MAX_GPU_INSTANCE);
 	p = container_of(prange->svms, struct kfd_process, svms);
@@ -1590,8 +1602,10 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		addr = next;
 	}
 
-	if (addr == end)
+	if (addr == end) {
 		prange->validated_once = true;
+		prange->mapped_to_gpu = true;
+	}
 
 unreserve_out:
 	svm_range_unreserve_bos(&ctx);
@@ -1822,6 +1836,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
 	new->prefetch_loc = old->prefetch_loc;
 	new->actual_loc = old->actual_loc;
 	new->granularity = old->granularity;
+	new->mapped_to_gpu = old->mapped_to_gpu;
 	bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
 	bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 66c77f00ac3e..2d54147b4dda 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -133,6 +133,7 @@ struct svm_range {
 	DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
 	DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
 	bool				validated_once;
+	bool				mapped_to_gpu;
 };
 
 static inline void svm_range_lock(struct svm_range *prange)
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] drm/amdkfd: Update mapping if range attributes changed
  2022-04-20  0:47 [PATCH 1/2] drm/amdkfd: Add SVM range mapped_to_gpu flag Philip Yang
@ 2022-04-20  0:47 ` Philip Yang
  2022-04-21 20:17   ` Felix Kuehling
  2022-04-22 14:06 ` [PATCH v2 " Philip Yang
  2022-04-25 13:48 ` [PATCH 1/2] drm/amdkfd: Add SVM range mapped_to_gpu flag Felix Kuehling
  2 siblings, 1 reply; 8+ messages in thread
From: Philip Yang @ 2022-04-20  0:47 UTC (permalink / raw)
  To: amd-gfx; +Cc: Philip Yang, Felix.Kuehling

Change SVM range mapping flags or access attributes don't trigger
migration, if range is already mapped on GPUs we should update GPU
mapping, and pass flush_tlb flag to amdgpu vm.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 41 ++++++++++++++++++----------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 6be1695f3a09..0a9bdadf875e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -686,7 +686,8 @@ svm_range_check_attr(struct kfd_process *p,
 
 static void
 svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
-		      uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+		      uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+		      bool *update_mapping)
 {
 	uint32_t i;
 	int gpuidx;
@@ -702,6 +703,7 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
 		case KFD_IOCTL_SVM_ATTR_ACCESS:
 		case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
 		case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+			*update_mapping = true;
 			gpuidx = kfd_process_gpuidx_from_gpuid(p,
 							       attrs[i].value);
 			if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
@@ -716,9 +718,11 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
 			}
 			break;
 		case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+			*update_mapping = true;
 			prange->flags |= attrs[i].value;
 			break;
 		case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+			*update_mapping = true;
 			prange->flags &= ~attrs[i].value;
 			break;
 		case KFD_IOCTL_SVM_ATTR_GRANULARITY:
@@ -1253,7 +1257,7 @@ static int
 svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
 		     unsigned long offset, unsigned long npages, bool readonly,
 		     dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
-		     struct dma_fence **fence)
+		     struct dma_fence **fence, bool flush_tlb)
 {
 	struct amdgpu_device *adev = pdd->dev->adev;
 	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
@@ -1291,7 +1295,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
 			 (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
 			 pte_flags);
 
-		r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL,
+		r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
 					   last_start, prange->start + i,
 					   pte_flags,
 					   last_start - prange->start,
@@ -1325,7 +1329,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
 static int
 svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
 		      unsigned long npages, bool readonly,
-		      unsigned long *bitmap, bool wait)
+		      unsigned long *bitmap, bool wait, bool flush_tlb)
 {
 	struct kfd_process_device *pdd;
 	struct amdgpu_device *bo_adev;
@@ -1360,7 +1364,8 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
 
 		r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
 					 prange->dma_addr[gpuidx],
-					 bo_adev, wait ? &fence : NULL);
+					 bo_adev, wait ? &fence : NULL,
+					 flush_tlb);
 		if (r)
 			break;
 
@@ -1481,8 +1486,8 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
  * 5. Release page table (and SVM BO) reservation
  */
 static int svm_range_validate_and_map(struct mm_struct *mm,
-				      struct svm_range *prange,
-				      int32_t gpuidx, bool intr, bool wait)
+				      struct svm_range *prange, int32_t gpuidx,
+				      bool intr, bool wait, bool flush_tlb)
 {
 	struct svm_validate_context ctx;
 	unsigned long start, end, addr;
@@ -1521,8 +1526,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 			  prange->bitmap_aip, MAX_GPU_INSTANCE);
 	}
 
-	if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
-		return 0;
+	if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) {
+		if (!prange->mapped_to_gpu)
+			return 0;
+
+		bitmap_copy(ctx.bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+	}
 
 	if (prange->actual_loc && !prange->ttm_res) {
 		/* This should never happen. actual_loc gets set by
@@ -1594,7 +1603,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		}
 
 		r = svm_range_map_to_gpus(prange, offset, npages, readonly,
-					  ctx.bitmap, wait);
+					  ctx.bitmap, wait, flush_tlb);
 
 unlock_out:
 		svm_range_unlock(prange);
@@ -1690,7 +1699,7 @@ static void svm_range_restore_work(struct work_struct *work)
 		mutex_lock(&prange->migrate_mutex);
 
 		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-					       false, true);
+					       false, true, false);
 		if (r)
 			pr_debug("failed %d to map 0x%lx to gpus\n", r,
 				 prange->start);
@@ -2828,7 +2837,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 		}
 	}
 
-	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
+	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
 	if (r)
 		pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
 			 r, svms, prange->start, prange->last);
@@ -3241,6 +3250,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
 	struct svm_range_list *svms;
 	struct svm_range *prange;
 	struct svm_range *next;
+	bool update_mapping = false;
+	bool flush_tlb;
 	int r = 0;
 
 	pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
@@ -3279,7 +3290,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
 		svm_range_add_notifier_locked(mm, prange);
 	}
 	list_for_each_entry(prange, &update_list, update_list) {
-		svm_range_apply_attrs(p, prange, nattr, attrs);
+		svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping);
 		/* TODO: unmap ranges from GPU that lost access */
 	}
 	list_for_each_entry_safe(prange, next, &remove_list, update_list) {
@@ -3312,8 +3323,10 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
 			continue;
 		}
 
+		flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
+
 		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-					       true, true);
+					       true, true, flush_tlb);
 		if (r)
 			pr_debug("failed %d to map svm range\n", r);
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] drm/amdkfd: Update mapping if range attributes changed
  2022-04-20  0:47 ` [PATCH 2/2] drm/amdkfd: Update mapping if range attributes changed Philip Yang
@ 2022-04-21 20:17   ` Felix Kuehling
  2022-04-22 12:54     ` philip yang
  0 siblings, 1 reply; 8+ messages in thread
From: Felix Kuehling @ 2022-04-21 20:17 UTC (permalink / raw)
  To: Philip Yang, amd-gfx

Am 2022-04-19 um 20:47 schrieb Philip Yang:
> Change SVM range mapping flags or access attributes don't trigger
> migration, if range is already mapped on GPUs we should update GPU
> mapping, and pass flush_tlb flag to amdgpu vm.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 41 ++++++++++++++++++----------
>   1 file changed, 27 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 6be1695f3a09..0a9bdadf875e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -686,7 +686,8 @@ svm_range_check_attr(struct kfd_process *p,
>   
>   static void
>   svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
> -		      uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
> +		      uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
> +		      bool *update_mapping)
>   {
>   	uint32_t i;
>   	int gpuidx;
> @@ -702,6 +703,7 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
>   		case KFD_IOCTL_SVM_ATTR_ACCESS:
>   		case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
>   		case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
> +			*update_mapping = true;
>   			gpuidx = kfd_process_gpuidx_from_gpuid(p,
>   							       attrs[i].value);
>   			if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
> @@ -716,9 +718,11 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
>   			}
>   			break;
>   		case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
> +			*update_mapping = true;
>   			prange->flags |= attrs[i].value;
>   			break;
>   		case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
> +			*update_mapping = true;
>   			prange->flags &= ~attrs[i].value;
>   			break;
>   		case KFD_IOCTL_SVM_ATTR_GRANULARITY:
> @@ -1253,7 +1257,7 @@ static int
>   svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
>   		     unsigned long offset, unsigned long npages, bool readonly,
>   		     dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
> -		     struct dma_fence **fence)
> +		     struct dma_fence **fence, bool flush_tlb)
>   {
>   	struct amdgpu_device *adev = pdd->dev->adev;
>   	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
> @@ -1291,7 +1295,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
>   			 (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>   			 pte_flags);
>   
> -		r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL,
> +		r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
>   					   last_start, prange->start + i,
>   					   pte_flags,
>   					   last_start - prange->start,
> @@ -1325,7 +1329,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
>   static int
>   svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
>   		      unsigned long npages, bool readonly,
> -		      unsigned long *bitmap, bool wait)
> +		      unsigned long *bitmap, bool wait, bool flush_tlb)
>   {
>   	struct kfd_process_device *pdd;
>   	struct amdgpu_device *bo_adev;
> @@ -1360,7 +1364,8 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
>   
>   		r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
>   					 prange->dma_addr[gpuidx],
> -					 bo_adev, wait ? &fence : NULL);
> +					 bo_adev, wait ? &fence : NULL,
> +					 flush_tlb);
>   		if (r)
>   			break;
>   
> @@ -1481,8 +1486,8 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
>    * 5. Release page table (and SVM BO) reservation
>    */
>   static int svm_range_validate_and_map(struct mm_struct *mm,
> -				      struct svm_range *prange,
> -				      int32_t gpuidx, bool intr, bool wait)
> +				      struct svm_range *prange, int32_t gpuidx,
> +				      bool intr, bool wait, bool flush_tlb)
>   {
>   	struct svm_validate_context ctx;
>   	unsigned long start, end, addr;
> @@ -1521,8 +1526,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>   			  prange->bitmap_aip, MAX_GPU_INSTANCE);
>   	}
>   
> -	if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
> -		return 0;
> +	if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) {
> +		if (!prange->mapped_to_gpu)
> +			return 0;
> +
> +		bitmap_copy(ctx.bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
> +	}
>   
>   	if (prange->actual_loc && !prange->ttm_res) {
>   		/* This should never happen. actual_loc gets set by
> @@ -1594,7 +1603,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>   		}
>   
>   		r = svm_range_map_to_gpus(prange, offset, npages, readonly,
> -					  ctx.bitmap, wait);
> +					  ctx.bitmap, wait, flush_tlb);
>   
>   unlock_out:
>   		svm_range_unlock(prange);
> @@ -1690,7 +1699,7 @@ static void svm_range_restore_work(struct work_struct *work)
>   		mutex_lock(&prange->migrate_mutex);
>   
>   		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
> -					       false, true);
> +					       false, true, false);
>   		if (r)
>   			pr_debug("failed %d to map 0x%lx to gpus\n", r,
>   				 prange->start);
> @@ -2828,7 +2837,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>   		}
>   	}
>   
> -	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
> +	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
>   	if (r)
>   		pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
>   			 r, svms, prange->start, prange->last);
> @@ -3241,6 +3250,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
>   	struct svm_range_list *svms;
>   	struct svm_range *prange;
>   	struct svm_range *next;
> +	bool update_mapping = false;
> +	bool flush_tlb;
>   	int r = 0;
>   
>   	pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
> @@ -3279,7 +3290,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
>   		svm_range_add_notifier_locked(mm, prange);
>   	}
>   	list_for_each_entry(prange, &update_list, update_list) {
> -		svm_range_apply_attrs(p, prange, nattr, attrs);
> +		svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping);
>   		/* TODO: unmap ranges from GPU that lost access */
>   	}
>   	list_for_each_entry_safe(prange, next, &remove_list, update_list) {
> @@ -3312,8 +3323,10 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
>   			continue;
>   		}
>   
> +		flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
> +

Can we skip the validate_and_map if update_mapping is false?

Other than that, the series looks good to me.

Regards,
   Felix


>   		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
> -					       true, true);
> +					       true, true, flush_tlb);
>   		if (r)
>   			pr_debug("failed %d to map svm range\n", r);
>   

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] drm/amdkfd: Update mapping if range attributes changed
  2022-04-21 20:17   ` Felix Kuehling
@ 2022-04-22 12:54     ` philip yang
  0 siblings, 0 replies; 8+ messages in thread
From: philip yang @ 2022-04-22 12:54 UTC (permalink / raw)
  To: Felix Kuehling, Philip Yang, amd-gfx

[-- Attachment #1: Type: text/html, Size: 18015 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 2/2] drm/amdkfd: Update mapping if range attributes changed
  2022-04-20  0:47 [PATCH 1/2] drm/amdkfd: Add SVM range mapped_to_gpu flag Philip Yang
  2022-04-20  0:47 ` [PATCH 2/2] drm/amdkfd: Update mapping if range attributes changed Philip Yang
@ 2022-04-22 14:06 ` Philip Yang
  2022-04-22 22:05   ` Felix Kuehling
  2022-04-25 13:48 ` [PATCH 1/2] drm/amdkfd: Add SVM range mapped_to_gpu flag Felix Kuehling
  2 siblings, 1 reply; 8+ messages in thread
From: Philip Yang @ 2022-04-22 14:06 UTC (permalink / raw)
  To: amd-gfx; +Cc: Philip Yang, Felix.Kuehling

Change SVM range mapping flags or access attributes don't trigger
migration, if range is already mapped on GPUs we should update GPU
mapping and pass flush_tlb flag true to amdgpu vm.

Change SVM range preferred_loc or migration granularity don't need
update GPU mapping, skip the validate_and_map.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 46 +++++++++++++++++++---------
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 8a077cd066a1..e740384df9c7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -686,7 +686,8 @@ svm_range_check_attr(struct kfd_process *p,
 
 static void
 svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
-		      uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+		      uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
+		      bool *update_mapping)
 {
 	uint32_t i;
 	int gpuidx;
@@ -702,6 +703,7 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
 		case KFD_IOCTL_SVM_ATTR_ACCESS:
 		case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
 		case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+			*update_mapping = true;
 			gpuidx = kfd_process_gpuidx_from_gpuid(p,
 							       attrs[i].value);
 			if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
@@ -716,9 +718,11 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
 			}
 			break;
 		case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+			*update_mapping = true;
 			prange->flags |= attrs[i].value;
 			break;
 		case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+			*update_mapping = true;
 			prange->flags &= ~attrs[i].value;
 			break;
 		case KFD_IOCTL_SVM_ATTR_GRANULARITY:
@@ -1254,7 +1258,7 @@ static int
 svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
 		     unsigned long offset, unsigned long npages, bool readonly,
 		     dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
-		     struct dma_fence **fence)
+		     struct dma_fence **fence, bool flush_tlb)
 {
 	struct amdgpu_device *adev = pdd->dev->adev;
 	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
@@ -1292,7 +1296,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
 			 (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
 			 pte_flags);
 
-		r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL,
+		r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
 					   last_start, prange->start + i,
 					   pte_flags,
 					   last_start - prange->start,
@@ -1326,7 +1330,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
 static int
 svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
 		      unsigned long npages, bool readonly,
-		      unsigned long *bitmap, bool wait)
+		      unsigned long *bitmap, bool wait, bool flush_tlb)
 {
 	struct kfd_process_device *pdd;
 	struct amdgpu_device *bo_adev;
@@ -1361,7 +1365,8 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
 
 		r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
 					 prange->dma_addr[gpuidx],
-					 bo_adev, wait ? &fence : NULL);
+					 bo_adev, wait ? &fence : NULL,
+					 flush_tlb);
 		if (r)
 			break;
 
@@ -1482,8 +1487,8 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
  * 5. Release page table (and SVM BO) reservation
  */
 static int svm_range_validate_and_map(struct mm_struct *mm,
-				      struct svm_range *prange,
-				      int32_t gpuidx, bool intr, bool wait)
+				      struct svm_range *prange, int32_t gpuidx,
+				      bool intr, bool wait, bool flush_tlb)
 {
 	struct svm_validate_context ctx;
 	unsigned long start, end, addr;
@@ -1522,8 +1527,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 			  prange->bitmap_aip, MAX_GPU_INSTANCE);
 	}
 
-	if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
-		return 0;
+	if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) {
+		if (!prange->mapped_to_gpu)
+			return 0;
+
+		bitmap_copy(ctx.bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+	}
 
 	if (prange->actual_loc && !prange->ttm_res) {
 		/* This should never happen. actual_loc gets set by
@@ -1595,7 +1604,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		}
 
 		r = svm_range_map_to_gpus(prange, offset, npages, readonly,
-					  ctx.bitmap, wait);
+					  ctx.bitmap, wait, flush_tlb);
 
 unlock_out:
 		svm_range_unlock(prange);
@@ -1691,7 +1700,7 @@ static void svm_range_restore_work(struct work_struct *work)
 		mutex_lock(&prange->migrate_mutex);
 
 		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-					       false, true);
+					       false, true, false);
 		if (r)
 			pr_debug("failed %d to map 0x%lx to gpus\n", r,
 				 prange->start);
@@ -2847,7 +2856,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 		}
 	}
 
-	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
+	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
 	if (r)
 		pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
 			 r, svms, prange->start, prange->last);
@@ -3264,6 +3273,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
 	struct svm_range_list *svms;
 	struct svm_range *prange;
 	struct svm_range *next;
+	bool update_mapping = false;
+	bool flush_tlb;
 	int r = 0;
 
 	pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
@@ -3302,7 +3313,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
 		svm_range_add_notifier_locked(mm, prange);
 	}
 	list_for_each_entry(prange, &update_list, update_list) {
-		svm_range_apply_attrs(p, prange, nattr, attrs);
+		svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping);
 		/* TODO: unmap ranges from GPU that lost access */
 	}
 	list_for_each_entry_safe(prange, next, &remove_list, update_list) {
@@ -3335,8 +3346,15 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
 			continue;
 		}
 
+		if (!migrated && !update_mapping) {
+			mutex_unlock(&prange->migrate_mutex);
+			continue;
+		}
+
+		flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
+
 		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
-					       true, true);
+					       true, true, flush_tlb);
 		if (r)
 			pr_debug("failed %d to map svm range\n", r);
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 2/2] drm/amdkfd: Update mapping if range attributes changed
  2022-04-22 14:06 ` [PATCH v2 " Philip Yang
@ 2022-04-22 22:05   ` Felix Kuehling
  2022-04-25 13:47     ` Felix Kuehling
  0 siblings, 1 reply; 8+ messages in thread
From: Felix Kuehling @ 2022-04-22 22:05 UTC (permalink / raw)
  To: Philip Yang, amd-gfx

On 2022-04-22 10:06, Philip Yang wrote:
> Change SVM range mapping flags or access attributes don't trigger
> migration, if range is already mapped on GPUs we should update GPU
> mapping and pass flush_tlb flag true to amdgpu vm.
>
> Change SVM range preferred_loc or migration granularity don't need
> update GPU mapping, skip the validate_and_map.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 46 +++++++++++++++++++---------
>   1 file changed, 32 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 8a077cd066a1..e740384df9c7 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -686,7 +686,8 @@ svm_range_check_attr(struct kfd_process *p,
>   
>   static void
>   svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
> -		      uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
> +		      uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
> +		      bool *update_mapping)
>   {
>   	uint32_t i;
>   	int gpuidx;
> @@ -702,6 +703,7 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
>   		case KFD_IOCTL_SVM_ATTR_ACCESS:
>   		case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
>   		case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
> +			*update_mapping = true;
>   			gpuidx = kfd_process_gpuidx_from_gpuid(p,
>   							       attrs[i].value);
>   			if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
> @@ -716,9 +718,11 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
>   			}
>   			break;
>   		case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
> +			*update_mapping = true;
>   			prange->flags |= attrs[i].value;
>   			break;
>   		case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
> +			*update_mapping = true;
>   			prange->flags &= ~attrs[i].value;
>   			break;
>   		case KFD_IOCTL_SVM_ATTR_GRANULARITY:
> @@ -1254,7 +1258,7 @@ static int
>   svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
>   		     unsigned long offset, unsigned long npages, bool readonly,
>   		     dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
> -		     struct dma_fence **fence)
> +		     struct dma_fence **fence, bool flush_tlb)
>   {
>   	struct amdgpu_device *adev = pdd->dev->adev;
>   	struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
> @@ -1292,7 +1296,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
>   			 (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>   			 pte_flags);
>   
> -		r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL,
> +		r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
>   					   last_start, prange->start + i,
>   					   pte_flags,
>   					   last_start - prange->start,
> @@ -1326,7 +1330,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
>   static int
>   svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
>   		      unsigned long npages, bool readonly,
> -		      unsigned long *bitmap, bool wait)
> +		      unsigned long *bitmap, bool wait, bool flush_tlb)
>   {
>   	struct kfd_process_device *pdd;
>   	struct amdgpu_device *bo_adev;
> @@ -1361,7 +1365,8 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
>   
>   		r = svm_range_map_to_gpu(pdd, prange, offset, npages, readonly,
>   					 prange->dma_addr[gpuidx],
> -					 bo_adev, wait ? &fence : NULL);
> +					 bo_adev, wait ? &fence : NULL,
> +					 flush_tlb);
>   		if (r)
>   			break;
>   
> @@ -1482,8 +1487,8 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
>    * 5. Release page table (and SVM BO) reservation
>    */
>   static int svm_range_validate_and_map(struct mm_struct *mm,
> -				      struct svm_range *prange,
> -				      int32_t gpuidx, bool intr, bool wait)
> +				      struct svm_range *prange, int32_t gpuidx,
> +				      bool intr, bool wait, bool flush_tlb)
>   {
>   	struct svm_validate_context ctx;
>   	unsigned long start, end, addr;
> @@ -1522,8 +1527,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>   			  prange->bitmap_aip, MAX_GPU_INSTANCE);
>   	}
>   
> -	if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
> -		return 0;
> +	if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) {
> +		if (!prange->mapped_to_gpu)
> +			return 0;
> +
> +		bitmap_copy(ctx.bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
> +	}
>   
>   	if (prange->actual_loc && !prange->ttm_res) {
>   		/* This should never happen. actual_loc gets set by
> @@ -1595,7 +1604,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>   		}
>   
>   		r = svm_range_map_to_gpus(prange, offset, npages, readonly,
> -					  ctx.bitmap, wait);
> +					  ctx.bitmap, wait, flush_tlb);
>   
>   unlock_out:
>   		svm_range_unlock(prange);
> @@ -1691,7 +1700,7 @@ static void svm_range_restore_work(struct work_struct *work)
>   		mutex_lock(&prange->migrate_mutex);
>   
>   		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
> -					       false, true);
> +					       false, true, false);
>   		if (r)
>   			pr_debug("failed %d to map 0x%lx to gpus\n", r,
>   				 prange->start);
> @@ -2847,7 +2856,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>   		}
>   	}
>   
> -	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
> +	r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, false);
>   	if (r)
>   		pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
>   			 r, svms, prange->start, prange->last);
> @@ -3264,6 +3273,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
>   	struct svm_range_list *svms;
>   	struct svm_range *prange;
>   	struct svm_range *next;
> +	bool update_mapping = false;
> +	bool flush_tlb;
>   	int r = 0;
>   
>   	pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
> @@ -3302,7 +3313,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
>   		svm_range_add_notifier_locked(mm, prange);
>   	}
>   	list_for_each_entry(prange, &update_list, update_list) {
> -		svm_range_apply_attrs(p, prange, nattr, attrs);
> +		svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping);
>   		/* TODO: unmap ranges from GPU that lost access */
>   	}
>   	list_for_each_entry_safe(prange, next, &remove_list, update_list) {
> @@ -3335,8 +3346,15 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
>   			continue;
>   		}
>   
> +		if (!migrated && !update_mapping) {
> +			mutex_unlock(&prange->migrate_mutex);
> +			continue;
> +		}
> +
> +		flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
> +

Please check that this doesn't break the XNACK off case. If a new range 
is created, and that does not trigger a migration or any of the 
conditions that set update_mapping, we still need to make sure the GPU 
mapping is created so that a GPU access to the new range won't fault.

Regards,
   Felix


>   		r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
> -					       true, true);
> +					       true, true, flush_tlb);
>   		if (r)
>   			pr_debug("failed %d to map svm range\n", r);
>   

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 2/2] drm/amdkfd: Update mapping if range attributes changed
  2022-04-22 22:05   ` Felix Kuehling
@ 2022-04-25 13:47     ` Felix Kuehling
  0 siblings, 0 replies; 8+ messages in thread
From: Felix Kuehling @ 2022-04-25 13:47 UTC (permalink / raw)
  To: Philip Yang, amd-gfx

Am 2022-04-22 um 18:05 schrieb Felix Kuehling:
> On 2022-04-22 10:06, Philip Yang wrote:
>> Change SVM range mapping flags or access attributes don't trigger
>> migration, if range is already mapped on GPUs we should update GPU
>> mapping and pass flush_tlb flag true to amdgpu vm.
>>
>> Change SVM range preferred_loc or migration granularity don't need
>> update GPU mapping, skip the validate_and_map.
>>
>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 46 +++++++++++++++++++---------
>>   1 file changed, 32 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
>> b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> index 8a077cd066a1..e740384df9c7 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
>> @@ -686,7 +686,8 @@ svm_range_check_attr(struct kfd_process *p,
>>     static void
>>   svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,
>> -              uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
>> +              uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
>> +              bool *update_mapping)
>>   {
>>       uint32_t i;
>>       int gpuidx;
>> @@ -702,6 +703,7 @@ svm_range_apply_attrs(struct kfd_process *p, 
>> struct svm_range *prange,
>>           case KFD_IOCTL_SVM_ATTR_ACCESS:
>>           case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
>>           case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
>> +            *update_mapping = true;
>>               gpuidx = kfd_process_gpuidx_from_gpuid(p,
>>                                      attrs[i].value);
>>               if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) {
>> @@ -716,9 +718,11 @@ svm_range_apply_attrs(struct kfd_process *p, 
>> struct svm_range *prange,
>>               }
>>               break;
>>           case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
>> +            *update_mapping = true;
>>               prange->flags |= attrs[i].value;
>>               break;
>>           case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
>> +            *update_mapping = true;
>>               prange->flags &= ~attrs[i].value;
>>               break;
>>           case KFD_IOCTL_SVM_ATTR_GRANULARITY:
>> @@ -1254,7 +1258,7 @@ static int
>>   svm_range_map_to_gpu(struct kfd_process_device *pdd, struct 
>> svm_range *prange,
>>                unsigned long offset, unsigned long npages, bool 
>> readonly,
>>                dma_addr_t *dma_addr, struct amdgpu_device *bo_adev,
>> -             struct dma_fence **fence)
>> +             struct dma_fence **fence, bool flush_tlb)
>>   {
>>       struct amdgpu_device *adev = pdd->dev->adev;
>>       struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
>> @@ -1292,7 +1296,7 @@ svm_range_map_to_gpu(struct kfd_process_device 
>> *pdd, struct svm_range *prange,
>>                (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
>>                pte_flags);
>>   -        r = amdgpu_vm_update_range(adev, vm, false, false, false, 
>> NULL,
>> +        r = amdgpu_vm_update_range(adev, vm, false, false, 
>> flush_tlb, NULL,
>>                          last_start, prange->start + i,
>>                          pte_flags,
>>                          last_start - prange->start,
>> @@ -1326,7 +1330,7 @@ svm_range_map_to_gpu(struct kfd_process_device 
>> *pdd, struct svm_range *prange,
>>   static int
>>   svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
>>                 unsigned long npages, bool readonly,
>> -              unsigned long *bitmap, bool wait)
>> +              unsigned long *bitmap, bool wait, bool flush_tlb)
>>   {
>>       struct kfd_process_device *pdd;
>>       struct amdgpu_device *bo_adev;
>> @@ -1361,7 +1365,8 @@ svm_range_map_to_gpus(struct svm_range *prange, 
>> unsigned long offset,
>>             r = svm_range_map_to_gpu(pdd, prange, offset, npages, 
>> readonly,
>>                        prange->dma_addr[gpuidx],
>> -                     bo_adev, wait ? &fence : NULL);
>> +                     bo_adev, wait ? &fence : NULL,
>> +                     flush_tlb);
>>           if (r)
>>               break;
>>   @@ -1482,8 +1487,8 @@ static void *kfd_svm_page_owner(struct 
>> kfd_process *p, int32_t gpuidx)
>>    * 5. Release page table (and SVM BO) reservation
>>    */
>>   static int svm_range_validate_and_map(struct mm_struct *mm,
>> -                      struct svm_range *prange,
>> -                      int32_t gpuidx, bool intr, bool wait)
>> +                      struct svm_range *prange, int32_t gpuidx,
>> +                      bool intr, bool wait, bool flush_tlb)
>>   {
>>       struct svm_validate_context ctx;
>>       unsigned long start, end, addr;
>> @@ -1522,8 +1527,12 @@ static int svm_range_validate_and_map(struct 
>> mm_struct *mm,
>>                 prange->bitmap_aip, MAX_GPU_INSTANCE);
>>       }
>>   -    if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE))
>> -        return 0;
>> +    if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) {
>> +        if (!prange->mapped_to_gpu)
>> +            return 0;
>> +
>> +        bitmap_copy(ctx.bitmap, prange->bitmap_access, 
>> MAX_GPU_INSTANCE);
>> +    }
>>         if (prange->actual_loc && !prange->ttm_res) {
>>           /* This should never happen. actual_loc gets set by
>> @@ -1595,7 +1604,7 @@ static int svm_range_validate_and_map(struct 
>> mm_struct *mm,
>>           }
>>             r = svm_range_map_to_gpus(prange, offset, npages, readonly,
>> -                      ctx.bitmap, wait);
>> +                      ctx.bitmap, wait, flush_tlb);
>>     unlock_out:
>>           svm_range_unlock(prange);
>> @@ -1691,7 +1700,7 @@ static void svm_range_restore_work(struct 
>> work_struct *work)
>>           mutex_lock(&prange->migrate_mutex);
>>             r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
>> -                           false, true);
>> +                           false, true, false);
>>           if (r)
>>               pr_debug("failed %d to map 0x%lx to gpus\n", r,
>>                    prange->start);
>> @@ -2847,7 +2856,7 @@ svm_range_restore_pages(struct amdgpu_device 
>> *adev, unsigned int pasid,
>>           }
>>       }
>>   -    r = svm_range_validate_and_map(mm, prange, gpuidx, false, false);
>> +    r = svm_range_validate_and_map(mm, prange, gpuidx, false, false, 
>> false);
>>       if (r)
>>           pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
>>                r, svms, prange->start, prange->last);
>> @@ -3264,6 +3273,8 @@ svm_range_set_attr(struct kfd_process *p, 
>> struct mm_struct *mm,
>>       struct svm_range_list *svms;
>>       struct svm_range *prange;
>>       struct svm_range *next;
>> +    bool update_mapping = false;
>> +    bool flush_tlb;
>>       int r = 0;
>>         pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n",
>> @@ -3302,7 +3313,7 @@ svm_range_set_attr(struct kfd_process *p, 
>> struct mm_struct *mm,
>>           svm_range_add_notifier_locked(mm, prange);
>>       }
>>       list_for_each_entry(prange, &update_list, update_list) {
>> -        svm_range_apply_attrs(p, prange, nattr, attrs);
>> +        svm_range_apply_attrs(p, prange, nattr, attrs, 
>> &update_mapping);
>>           /* TODO: unmap ranges from GPU that lost access */
>>       }
>>       list_for_each_entry_safe(prange, next, &remove_list, 
>> update_list) {
>> @@ -3335,8 +3346,15 @@ svm_range_set_attr(struct kfd_process *p, 
>> struct mm_struct *mm,
>>               continue;
>>           }
>>   +        if (!migrated && !update_mapping) {
>> +            mutex_unlock(&prange->migrate_mutex);
>> +            continue;
>> +        }
>> +
>> +        flush_tlb = !migrated && update_mapping && 
>> prange->mapped_to_gpu;
>> +
>
> Please check that this doesn't break the XNACK off case. If a new 
> range is created, and that does not trigger a migration or any of the 
> conditions that set update_mapping, we still need to make sure the GPU 
> mapping is created so that a GPU access to the new range won't fault.

Thanks for following up offline. For the record, with XNACK off, the 
default is "not accessible". So to create a GPU mapping, the application 
has to set one of the accessibility attributes, which will trigger a 
page table update with your patch.

The patch is

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


>
> Regards,
>   Felix
>
>
>>           r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
>> -                           true, true);
>> +                           true, true, flush_tlb);
>>           if (r)
>>               pr_debug("failed %d to map svm range\n", r);

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] drm/amdkfd: Add SVM range mapped_to_gpu flag
  2022-04-20  0:47 [PATCH 1/2] drm/amdkfd: Add SVM range mapped_to_gpu flag Philip Yang
  2022-04-20  0:47 ` [PATCH 2/2] drm/amdkfd: Update mapping if range attributes changed Philip Yang
  2022-04-22 14:06 ` [PATCH v2 " Philip Yang
@ 2022-04-25 13:48 ` Felix Kuehling
  2 siblings, 0 replies; 8+ messages in thread
From: Felix Kuehling @ 2022-04-25 13:48 UTC (permalink / raw)
  To: Philip Yang, amd-gfx

Am 2022-04-19 um 20:47 schrieb Philip Yang:
> To avoid unnecessary unmap SVM range from GPUs if range is not mapped on
> GPUs when migrating the range. This flag will also be used to flush TLB
> when updating the existing mapping on GPUs.
>
> It is protected by prange->migrate_mutex and mmap read lock in MMU
> notifier callback.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 17 ++++++++++++++++-
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  1 +
>   2 files changed, 17 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 5afe216cf099..6be1695f3a09 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -951,6 +951,7 @@ svm_range_split_adjust(struct svm_range *new, struct svm_range *old,
>   	new->prefetch_loc = old->prefetch_loc;
>   	new->actual_loc = old->actual_loc;
>   	new->granularity = old->granularity;
> +	new->mapped_to_gpu = old->mapped_to_gpu;
>   	bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
>   	bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
>   
> @@ -1204,6 +1205,17 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
>   	uint32_t gpuidx;
>   	int r = 0;
>   
> +	if (!prange->mapped_to_gpu) {
> +		pr_debug("prange 0x%p [0x%lx 0x%lx] not mapped to GPU\n",
> +			 prange, prange->start, prange->last);
> +		return 0;
> +	}
> +
> +	if (prange->start == start && prange->last == last) {
> +		pr_debug("unmap svms 0x%p prange 0x%p\n", prange->svms, prange);
> +		prange->mapped_to_gpu = false;
> +	}
> +
>   	bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
>   		  MAX_GPU_INSTANCE);
>   	p = container_of(prange->svms, struct kfd_process, svms);
> @@ -1590,8 +1602,10 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>   		addr = next;
>   	}
>   
> -	if (addr == end)
> +	if (addr == end) {
>   		prange->validated_once = true;
> +		prange->mapped_to_gpu = true;
> +	}
>   
>   unreserve_out:
>   	svm_range_unreserve_bos(&ctx);
> @@ -1822,6 +1836,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
>   	new->prefetch_loc = old->prefetch_loc;
>   	new->actual_loc = old->actual_loc;
>   	new->granularity = old->granularity;
> +	new->mapped_to_gpu = old->mapped_to_gpu;
>   	bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
>   	bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> index 66c77f00ac3e..2d54147b4dda 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> @@ -133,6 +133,7 @@ struct svm_range {
>   	DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
>   	DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
>   	bool				validated_once;
> +	bool				mapped_to_gpu;
>   };
>   
>   static inline void svm_range_lock(struct svm_range *prange)

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-04-25 13:48 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-20  0:47 [PATCH 1/2] drm/amdkfd: Add SVM range mapped_to_gpu flag Philip Yang
2022-04-20  0:47 ` [PATCH 2/2] drm/amdkfd: Update mapping if range attributes changed Philip Yang
2022-04-21 20:17   ` Felix Kuehling
2022-04-22 12:54     ` philip yang
2022-04-22 14:06 ` [PATCH v2 " Philip Yang
2022-04-22 22:05   ` Felix Kuehling
2022-04-25 13:47     ` Felix Kuehling
2022-04-25 13:48 ` [PATCH 1/2] drm/amdkfd: Add SVM range mapped_to_gpu flag Felix Kuehling

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).