All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdkfd: check access permisson to restore retry fault
@ 2021-08-19 14:56 Philip Yang
  2021-08-19 14:56 ` [PATCH 2/2] drm/amdkfd: map SVM range with correct access permission Philip Yang
  2021-08-19 21:12 ` [PATCH 1/2] drm/amdkfd: check access permisson to restore retry fault Felix Kuehling
  0 siblings, 2 replies; 5+ messages in thread
From: Philip Yang @ 2021-08-19 14:56 UTC (permalink / raw)
  To: amd-gfx; +Cc: Philip Yang

Check range access permission to restore GPU retry fault, if GPU retry
fault on address which belongs to VMA, and VMA has no read or write
permission requested by GPU, failed to restore the address. The vm fault
event will pass back to user space.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  3 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 30 +++++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h   |  5 +++--
 6 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 831f00644460..ff6de40b860c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3347,12 +3347,13 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
  * @adev: amdgpu device pointer
  * @pasid: PASID of the VM
  * @addr: Address of the fault
+ * @rw_fault: 0 is read fault, 1 is write fault
  *
  * Try to gracefully handle a VM fault. Return true if the fault was handled and
  * shouldn't be reported any more.
  */
 bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
-			    uint64_t addr)
+			    uint64_t addr, uint32_t rw_fault)
 {
 	bool is_compute_context = false;
 	struct amdgpu_bo *root;
@@ -3377,7 +3378,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
 	addr /= AMDGPU_GPU_PAGE_SIZE;
 
 	if (is_compute_context &&
-	    !svm_range_restore_pages(adev, pasid, addr)) {
+	    !svm_range_restore_pages(adev, pasid, addr, rw_fault)) {
 		amdgpu_bo_unref(&root);
 		return true;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 80cc9ab2c1d0..1cc574ece180 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -448,7 +448,7 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
 void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
 			     struct amdgpu_task_info *task_info);
 bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
-			    uint64_t addr);
+			    uint64_t addr, uint32_t rw_fault);
 
 void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 24b781e90bef..994983901006 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -93,6 +93,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
 				       struct amdgpu_iv_entry *entry)
 {
 	bool retry_fault = !!(entry->src_data[1] & 0x80);
+	bool rw_fault = !!(entry->src_data[1] & 0x20);
 	struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src];
 	struct amdgpu_task_info task_info;
 	uint32_t status = 0;
@@ -121,7 +122,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
 		/* Try to handle the recoverable page faults by filling page
 		 * tables
 		 */
-		if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+		if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, rw_fault))
 			return 1;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 097230b5e946..9a37fd0527a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -506,6 +506,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 				      struct amdgpu_iv_entry *entry)
 {
 	bool retry_fault = !!(entry->src_data[1] & 0x80);
+	bool rw_fault = !!(entry->src_data[1] & 0x20);
 	uint32_t status = 0, cid = 0, rw = 0;
 	struct amdgpu_task_info task_info;
 	struct amdgpu_vmhub *hub;
@@ -536,7 +537,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 		/* Try to handle the recoverable page faults by filling page
 		 * tables
 		 */
-		if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
+		if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, rw_fault))
 			return 1;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index d4a43c94bcf9..cf1009bb532a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2400,9 +2400,29 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
 		WRITE_ONCE(pdd->faults, pdd->faults + 1);
 }
 
+static bool
+svm_range_allow_access(struct mm_struct *mm, uint64_t addr, uint32_t rw_fault)
+{
+	unsigned long requested = VM_READ;
+	struct vm_area_struct *vma;
+
+	if (rw_fault)
+		requested |= VM_WRITE;
+
+	vma = find_vma(mm, addr << PAGE_SHIFT);
+	if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+		pr_debug("address 0x%llx VMA is removed\n", addr);
+		return true;
+	}
+
+	pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested,
+		vma->vm_flags);
+	return (requested & ~vma->vm_flags) == 0;
+}
+
 int
 svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
-			uint64_t addr)
+			uint64_t addr, uint32_t rw_fault)
 {
 	struct mm_struct *mm = NULL;
 	struct svm_range_list *svms;
@@ -2440,6 +2460,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 	}
 
 	mmap_read_lock(mm);
+
 retry_write_locked:
 	mutex_lock(&svms->lock);
 	prange = svm_range_from_addr(svms, addr, NULL);
@@ -2484,6 +2505,13 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
 		goto out_unlock_range;
 	}
 
+	if (!svm_range_allow_access(mm, addr, rw_fault)) {
+		pr_debug("fault addr 0x%llx no %s permission\n", addr,
+			rw_fault ? "write" : "read");
+		r = -EPERM;
+		goto out_unlock_range;
+	}
+
 	best_loc = svm_range_best_restore_location(prange, adev, &gpuidx);
 	if (best_loc == -1) {
 		pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index e7fc5e8998aa..e77d90de08a6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -175,7 +175,7 @@ int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
 			       unsigned long addr, struct svm_range *parent,
 			       struct svm_range *prange);
 int svm_range_restore_pages(struct amdgpu_device *adev,
-			    unsigned int pasid, uint64_t addr);
+			    unsigned int pasid, uint64_t addr, uint32_t rw);
 int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
 void svm_range_add_list_work(struct svm_range_list *svms,
 			     struct svm_range *prange, struct mm_struct *mm,
@@ -210,7 +210,8 @@ static inline void svm_range_list_fini(struct kfd_process *p)
 }
 
 static inline int svm_range_restore_pages(struct amdgpu_device *adev,
-					  unsigned int pasid, uint64_t addr)
+					  unsigned int pasid, uint64_t addr,
+					  uint32_t rw_fault)
 {
 	return -EFAULT;
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] drm/amdkfd: map SVM range with correct access permission
  2021-08-19 14:56 [PATCH 1/2] drm/amdkfd: check access permisson to restore retry fault Philip Yang
@ 2021-08-19 14:56 ` Philip Yang
  2021-08-19 21:32   ` Felix Kuehling
  2021-08-19 21:12 ` [PATCH 1/2] drm/amdkfd: check access permisson to restore retry fault Felix Kuehling
  1 sibling, 1 reply; 5+ messages in thread
From: Philip Yang @ 2021-08-19 14:56 UTC (permalink / raw)
  To: amd-gfx; +Cc: Philip Yang

Restore retry fault or prefetch range, or restore svm range after
eviction to map range to GPU with correct read or write access
permission.

Range may includes multiple VMAs, update GPU page table with offset of
prange, number of pages for each VMA according VMA access permission.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 131 +++++++++++++++++----------
 1 file changed, 84 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index cf1009bb532a..94612581963f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -120,6 +120,7 @@ static void svm_range_remove_notifier(struct svm_range *prange)
 
 static int
 svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
+		      unsigned long offset, unsigned long npages,
 		      unsigned long *hmm_pfns, uint32_t gpuidx)
 {
 	enum dma_data_direction dir = DMA_BIDIRECTIONAL;
@@ -136,7 +137,8 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 		prange->dma_addr[gpuidx] = addr;
 	}
 
-	for (i = 0; i < prange->npages; i++) {
+	addr += offset;
+	for (i = 0; i < npages; i++) {
 		if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
 			      "leaking dma mapping\n"))
 			dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
@@ -167,6 +169,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
 
 static int
 svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
+		  unsigned long offset, unsigned long npages,
 		  unsigned long *hmm_pfns)
 {
 	struct kfd_process *p;
@@ -187,7 +190,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
 		}
 		adev = (struct amdgpu_device *)pdd->dev->kgd;
 
-		r = svm_range_dma_map_dev(adev, prange, hmm_pfns, gpuidx);
+		r = svm_range_dma_map_dev(adev, prange, offset, npages,
+					  hmm_pfns, gpuidx);
 		if (r)
 			break;
 	}
@@ -1088,11 +1092,6 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
 	pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
 
 	pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
-
-	pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n",
-		 prange->svms, prange->start, prange->last,
-		 (domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, mapping_flags);
-
 	return pte_flags;
 }
 
@@ -1156,7 +1155,8 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
 
 static int
 svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-		     struct svm_range *prange, dma_addr_t *dma_addr,
+		     struct svm_range *prange, unsigned long offset,
+		     unsigned long npages, bool readonly, dma_addr_t *dma_addr,
 		     struct amdgpu_device *bo_adev, struct dma_fence **fence)
 {
 	struct amdgpu_bo_va bo_va;
@@ -1167,14 +1167,15 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	int r = 0;
 	int64_t i;
 
-	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
-		 prange->last);
+	last_start = prange->start + offset;
+
+	pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,
+		 last_start, last_start + npages - 1, readonly);
 
 	if (prange->svm_bo && prange->ttm_res)
 		bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
 
-	last_start = prange->start;
-	for (i = 0; i < prange->npages; i++) {
+	for (i = offset; i < offset + npages; i++) {
 		last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
 		dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
 		if ((prange->start + i) < prange->last &&
@@ -1183,13 +1184,21 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
 		pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
 			 last_start, prange->start + i, last_domain ? "GPU" : "CPU");
+
 		pte_flags = svm_range_get_pte_flags(adev, prange, last_domain);
-		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
-						last_start,
+		if (readonly)
+			pte_flags &= ~AMDGPU_PTE_WRITEABLE;
+
+		pr_debug("svms 0x%p map [0x%lx 0x%llx] vram %d PTE 0x%llx\n",
+			 prange->svms, last_start, prange->start + i,
+			 (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
+			 pte_flags);
+
+		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
+						NULL, last_start,
 						prange->start + i, pte_flags,
 						last_start - prange->start,
-						NULL,
-						dma_addr,
+						NULL, dma_addr,
 						&vm->last_update,
 						&table_freed);
 		if (r) {
@@ -1220,8 +1229,10 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	return r;
 }
 
-static int svm_range_map_to_gpus(struct svm_range *prange,
-				 unsigned long *bitmap, bool wait)
+static int
+svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
+		      unsigned long npages, bool readonly,
+		      unsigned long *bitmap, bool wait)
 {
 	struct kfd_process_device *pdd;
 	struct amdgpu_device *bo_adev;
@@ -1257,7 +1268,8 @@ static int svm_range_map_to_gpus(struct svm_range *prange,
 		}
 
 		r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
-					 prange, prange->dma_addr[gpuidx],
+					 prange, offset, npages, readonly,
+					 prange->dma_addr[gpuidx],
 					 bo_adev, wait ? &fence : NULL);
 		if (r)
 			break;
@@ -1390,6 +1402,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 				      int32_t gpuidx, bool intr, bool wait)
 {
 	struct svm_validate_context ctx;
+	unsigned long start, end, addr;
 	struct hmm_range *hmm_range;
 	struct kfd_process *p;
 	void *owner;
@@ -1448,40 +1461,64 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 			break;
 		}
 	}
-	r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
-				       prange->start << PAGE_SHIFT,
-				       prange->npages, &hmm_range,
-				       false, true, owner);
-	if (r) {
-		pr_debug("failed %d to get svm range pages\n", r);
-		goto unreserve_out;
-	}
 
-	r = svm_range_dma_map(prange, ctx.bitmap,
-			      hmm_range->hmm_pfns);
-	if (r) {
-		pr_debug("failed %d to dma map range\n", r);
-		goto unreserve_out;
-	}
+	start = prange->start << PAGE_SHIFT;
+	end = (prange->last + 1) << PAGE_SHIFT;
+	for (addr = start; addr < end && !r; ) {
+		struct vm_area_struct *vma;
+		unsigned long next;
+		unsigned long offset;
+		unsigned long npages;
+		bool readonly;
 
-	prange->validated_once = true;
+		vma = find_vma(mm, addr);
+		if (!vma || addr < vma->vm_start) {
+			r = -EINVAL;
+			goto unreserve_out;
+		}
+		readonly = !(vma->vm_flags & VM_WRITE);
 
-	svm_range_lock(prange);
-	if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
-		pr_debug("hmm update the range, need validate again\n");
-		r = -EAGAIN;
-		goto unlock_out;
-	}
-	if (!list_empty(&prange->child_list)) {
-		pr_debug("range split by unmap in parallel, validate again\n");
-		r = -EAGAIN;
-		goto unlock_out;
-	}
+		next = min(vma->vm_end, end);
+		npages = (next - addr) / PAGE_SIZE;
+		r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
+					       addr, npages, &hmm_range,
+					       readonly, true, owner);
+		if (r) {
+			pr_debug("failed %d to get svm range pages\n", r);
+			goto unreserve_out;
+		}
 
-	r = svm_range_map_to_gpus(prange, ctx.bitmap, wait);
+		offset = (addr - start) / PAGE_SIZE;
+		r = svm_range_dma_map(prange, ctx.bitmap, offset, npages,
+				      hmm_range->hmm_pfns);
+		if (r) {
+			pr_debug("failed %d to dma map range\n", r);
+			goto unreserve_out;
+		}
+
+		svm_range_lock(prange);
+		if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+			pr_debug("hmm update the range, need validate again\n");
+			r = -EAGAIN;
+			goto unlock_out;
+		}
+		if (!list_empty(&prange->child_list)) {
+			pr_debug("range split by unmap in parallel, validate again\n");
+			r = -EAGAIN;
+			goto unlock_out;
+		}
+
+		r = svm_range_map_to_gpus(prange, offset, npages, readonly,
+					  ctx.bitmap, wait);
 
 unlock_out:
-	svm_range_unlock(prange);
+		svm_range_unlock(prange);
+
+		addr = next;
+	}
+
+	prange->validated_once = true;
+
 unreserve_out:
 	svm_range_unreserve_bos(&ctx);
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] drm/amdkfd: check access permisson to restore retry fault
  2021-08-19 14:56 [PATCH 1/2] drm/amdkfd: check access permisson to restore retry fault Philip Yang
  2021-08-19 14:56 ` [PATCH 2/2] drm/amdkfd: map SVM range with correct access permission Philip Yang
@ 2021-08-19 21:12 ` Felix Kuehling
  1 sibling, 0 replies; 5+ messages in thread
From: Felix Kuehling @ 2021-08-19 21:12 UTC (permalink / raw)
  To: Philip Yang, amd-gfx

Am 2021-08-19 um 10:56 a.m. schrieb Philip Yang:
> Check range access permission to restore GPU retry fault, if GPU retry
> fault on address which belongs to VMA, and VMA has no read or write
> permission requested by GPU, failed to restore the address. The vm fault
> event will pass back to user space.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>

Just some nit-picks. Other than that the patch looks good to me. See
inline ...


> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  5 +++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +-
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  3 ++-
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  3 ++-
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 30 +++++++++++++++++++++++++-
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.h   |  5 +++--
>  6 files changed, 40 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 831f00644460..ff6de40b860c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -3347,12 +3347,13 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
>   * @adev: amdgpu device pointer
>   * @pasid: PASID of the VM
>   * @addr: Address of the fault
> + * @rw_fault: 0 is read fault, 1 is write fault
>   *
>   * Try to gracefully handle a VM fault. Return true if the fault was handled and
>   * shouldn't be reported any more.
>   */
>  bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
> -			    uint64_t addr)
> +			    uint64_t addr, uint32_t rw_fault)

Should rw_fault be a bool? And maybe call it write_fault to clarify what
"true" means.


>  {
>  	bool is_compute_context = false;
>  	struct amdgpu_bo *root;
> @@ -3377,7 +3378,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
>  	addr /= AMDGPU_GPU_PAGE_SIZE;
>  
>  	if (is_compute_context &&
> -	    !svm_range_restore_pages(adev, pasid, addr)) {
> +	    !svm_range_restore_pages(adev, pasid, addr, rw_fault)) {
>  		amdgpu_bo_unref(&root)
>  		return true;
>  	}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 80cc9ab2c1d0..1cc574ece180 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -448,7 +448,7 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
>  void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
>  			     struct amdgpu_task_info *task_info);
>  bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
> -			    uint64_t addr);
> +			    uint64_t addr, uint32_t rw_fault);

bool write_fault


>  
>  void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 24b781e90bef..994983901006 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -93,6 +93,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
>  				       struct amdgpu_iv_entry *entry)
>  {
>  	bool retry_fault = !!(entry->src_data[1] & 0x80);
> +	bool rw_fault = !!(entry->src_data[1] & 0x20);

write_fault


>  	struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src];
>  	struct amdgpu_task_info task_info;
>  	uint32_t status = 0;
> @@ -121,7 +122,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
>  		/* Try to handle the recoverable page faults by filling page
>  		 * tables
>  		 */
> -		if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
> +		if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, rw_fault))
>  			return 1;
>  	}
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 097230b5e946..9a37fd0527a9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -506,6 +506,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
>  				      struct amdgpu_iv_entry *entry)
>  {
>  	bool retry_fault = !!(entry->src_data[1] & 0x80);
> +	bool rw_fault = !!(entry->src_data[1] & 0x20);

write_fault


>  	uint32_t status = 0, cid = 0, rw = 0;
>  	struct amdgpu_task_info task_info;
>  	struct amdgpu_vmhub *hub;
> @@ -536,7 +537,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
>  		/* Try to handle the recoverable page faults by filling page
>  		 * tables
>  		 */
> -		if (amdgpu_vm_handle_fault(adev, entry->pasid, addr))
> +		if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, rw_fault))
>  			return 1;
>  	}
>  
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index d4a43c94bcf9..cf1009bb532a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -2400,9 +2400,29 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,
>  		WRITE_ONCE(pdd->faults, pdd->faults + 1);
>  }
>  
> +static bool
> +svm_range_allow_access(struct mm_struct *mm, uint64_t addr, uint32_t rw_fault)

I think the function name "svm_range_..." is a bit misleading because it
doesn't do anything for an address range. It only checks one VMA at one
virtual address. I'd suggest "svm_fault_allowed".


> +{
> +	unsigned long requested = VM_READ;
> +	struct vm_area_struct *vma;
> +
> +	if (rw_fault)
> +		requested |= VM_WRITE;
> +
> +	vma = find_vma(mm, addr << PAGE_SHIFT);
> +	if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
> +		pr_debug("address 0x%llx VMA is removed\n", addr);
> +		return true;
> +	}
> +
> +	pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested,
> +		vma->vm_flags);
> +	return (requested & ~vma->vm_flags) == 0;

I think this is the same as "(vma->vm_flags & requested) == requested".


> +}
> +
>  int
>  svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
> -			uint64_t addr)
> +			uint64_t addr, uint32_t rw_fault)

bool write_fault

Regards,
  Felix


>  {
>  	struct mm_struct *mm = NULL;
>  	struct svm_range_list *svms;
> @@ -2440,6 +2460,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>  	}
>  
>  	mmap_read_lock(mm);
> +
>  retry_write_locked:
>  	mutex_lock(&svms->lock);
>  	prange = svm_range_from_addr(svms, addr, NULL);
> @@ -2484,6 +2505,13 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
>  		goto out_unlock_range;
>  	}
>  
> +	if (!svm_range_allow_access(mm, addr, rw_fault)) {
> +		pr_debug("fault addr 0x%llx no %s permission\n", addr,
> +			rw_fault ? "write" : "read");
> +		r = -EPERM;
> +		goto out_unlock_range;
> +	}
> +
>  	best_loc = svm_range_best_restore_location(prange, adev, &gpuidx);
>  	if (best_loc == -1) {
>  		pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n",
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> index e7fc5e8998aa..e77d90de08a6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
> @@ -175,7 +175,7 @@ int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
>  			       unsigned long addr, struct svm_range *parent,
>  			       struct svm_range *prange);
>  int svm_range_restore_pages(struct amdgpu_device *adev,
> -			    unsigned int pasid, uint64_t addr);
> +			    unsigned int pasid, uint64_t addr, uint32_t rw);
>  int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
>  void svm_range_add_list_work(struct svm_range_list *svms,
>  			     struct svm_range *prange, struct mm_struct *mm,
> @@ -210,7 +210,8 @@ static inline void svm_range_list_fini(struct kfd_process *p)
>  }
>  
>  static inline int svm_range_restore_pages(struct amdgpu_device *adev,
> -					  unsigned int pasid, uint64_t addr)
> +					  unsigned int pasid, uint64_t addr,
> +					  uint32_t rw_fault)
>  {
>  	return -EFAULT;
>  }

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] drm/amdkfd: map SVM range with correct access permission
  2021-08-19 14:56 ` [PATCH 2/2] drm/amdkfd: map SVM range with correct access permission Philip Yang
@ 2021-08-19 21:32   ` Felix Kuehling
  2021-08-20 15:44     ` philip yang
  0 siblings, 1 reply; 5+ messages in thread
From: Felix Kuehling @ 2021-08-19 21:32 UTC (permalink / raw)
  To: amd-gfx, Yang, Philip

Am 2021-08-19 um 10:56 a.m. schrieb Philip Yang:
> Restore retry fault or prefetch range, or restore svm range after
> eviction to map range to GPU with correct read or write access
> permission.
>
> Range may includes multiple VMAs, update GPU page table with offset of
> prange, number of pages for each VMA according VMA access permission.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>

Minor nitpicks, and one question. See inline. It looks good otherwise.


> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 131 +++++++++++++++++----------
>  1 file changed, 84 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index cf1009bb532a..94612581963f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -120,6 +120,7 @@ static void svm_range_remove_notifier(struct svm_range *prange)
>  
>  static int
>  svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
> +		      unsigned long offset, unsigned long npages,
>  		      unsigned long *hmm_pfns, uint32_t gpuidx)
>  {
>  	enum dma_data_direction dir = DMA_BIDIRECTIONAL;
> @@ -136,7 +137,8 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
>  		prange->dma_addr[gpuidx] = addr;
>  	}
>  
> -	for (i = 0; i < prange->npages; i++) {
> +	addr += offset;
> +	for (i = 0; i < npages; i++) {
>  		if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
>  			      "leaking dma mapping\n"))
>  			dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
> @@ -167,6 +169,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
>  
>  static int
>  svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
> +		  unsigned long offset, unsigned long npages,
>  		  unsigned long *hmm_pfns)
>  {
>  	struct kfd_process *p;
> @@ -187,7 +190,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
>  		}
>  		adev = (struct amdgpu_device *)pdd->dev->kgd;
>  
> -		r = svm_range_dma_map_dev(adev, prange, hmm_pfns, gpuidx);
> +		r = svm_range_dma_map_dev(adev, prange, offset, npages,
> +					  hmm_pfns, gpuidx);
>  		if (r)
>  			break;
>  	}
> @@ -1088,11 +1092,6 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
>  	pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
>  
>  	pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags);
> -
> -	pr_debug("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n",
> -		 prange->svms, prange->start, prange->last,
> -		 (domain == SVM_RANGE_VRAM_DOMAIN) ? 1:0, pte_flags, mapping_flags);
> -
>  	return pte_flags;
>  }
>  
> @@ -1156,7 +1155,8 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
>  
>  static int
>  svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> -		     struct svm_range *prange, dma_addr_t *dma_addr,
> +		     struct svm_range *prange, unsigned long offset,
> +		     unsigned long npages, bool readonly, dma_addr_t *dma_addr,
>  		     struct amdgpu_device *bo_adev, struct dma_fence **fence)
>  {
>  	struct amdgpu_bo_va bo_va;
> @@ -1167,14 +1167,15 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  	int r = 0;
>  	int64_t i;
>  
> -	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
> -		 prange->last);
> +	last_start = prange->start + offset;
> +
> +	pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,
> +		 last_start, last_start + npages - 1, readonly);
>  
>  	if (prange->svm_bo && prange->ttm_res)
>  		bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
>  
> -	last_start = prange->start;
> -	for (i = 0; i < prange->npages; i++) {
> +	for (i = offset; i < offset + npages; i++) {
>  		last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;
>  		dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN;
>  		if ((prange->start + i) < prange->last &&
> @@ -1183,13 +1184,21 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  
>  		pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n",
>  			 last_start, prange->start + i, last_domain ? "GPU" : "CPU");
> +
>  		pte_flags = svm_range_get_pte_flags(adev, prange, last_domain);
> -		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
> -						last_start,
> +		if (readonly)
> +			pte_flags &= ~AMDGPU_PTE_WRITEABLE;
> +
> +		pr_debug("svms 0x%p map [0x%lx 0x%llx] vram %d PTE 0x%llx\n",
> +			 prange->svms, last_start, prange->start + i,
> +			 (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0,
> +			 pte_flags);
> +
> +		r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
> +						NULL, last_start,
>  						prange->start + i, pte_flags,
>  						last_start - prange->start,
> -						NULL,
> -						dma_addr,
> +						NULL, dma_addr,
>  						&vm->last_update,
>  						&table_freed);
>  		if (r) {
> @@ -1220,8 +1229,10 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  	return r;
>  }
>  
> -static int svm_range_map_to_gpus(struct svm_range *prange,
> -				 unsigned long *bitmap, bool wait)
> +static int
> +svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
> +		      unsigned long npages, bool readonly,
> +		      unsigned long *bitmap, bool wait)
>  {
>  	struct kfd_process_device *pdd;
>  	struct amdgpu_device *bo_adev;
> @@ -1257,7 +1268,8 @@ static int svm_range_map_to_gpus(struct svm_range *prange,
>  		}
>  
>  		r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv),
> -					 prange, prange->dma_addr[gpuidx],
> +					 prange, offset, npages, readonly,
> +					 prange->dma_addr[gpuidx],
>  					 bo_adev, wait ? &fence : NULL);
>  		if (r)
>  			break;
> @@ -1390,6 +1402,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>  				      int32_t gpuidx, bool intr, bool wait)
>  {
>  	struct svm_validate_context ctx;
> +	unsigned long start, end, addr;
>  	struct hmm_range *hmm_range;
>  	struct kfd_process *p;
>  	void *owner;
> @@ -1448,40 +1461,64 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
>  			break;
>  		}
>  	}
> -	r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
> -				       prange->start << PAGE_SHIFT,
> -				       prange->npages, &hmm_range,
> -				       false, true, owner);
> -	if (r) {
> -		pr_debug("failed %d to get svm range pages\n", r);
> -		goto unreserve_out;
> -	}
>  
> -	r = svm_range_dma_map(prange, ctx.bitmap,
> -			      hmm_range->hmm_pfns);
> -	if (r) {
> -		pr_debug("failed %d to dma map range\n", r);
> -		goto unreserve_out;
> -	}
> +	start = prange->start << PAGE_SHIFT;
> +	end = (prange->last + 1) << PAGE_SHIFT;
> +	for (addr = start; addr < end && !r; ) {
> +		struct vm_area_struct *vma;
> +		unsigned long next;
> +		unsigned long offset;
> +		unsigned long npages;
> +		bool readonly;
>  
> -	prange->validated_once = true;
> +		vma = find_vma(mm, addr);
> +		if (!vma || addr < vma->vm_start) {
> +			r = -EINVAL;

I think -EFAULT would be the appropriate error code here.


> +			goto unreserve_out;
> +		}
> +		readonly = !(vma->vm_flags & VM_WRITE);
>  
> -	svm_range_lock(prange);
> -	if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
> -		pr_debug("hmm update the range, need validate again\n");
> -		r = -EAGAIN;
> -		goto unlock_out;
> -	}
> -	if (!list_empty(&prange->child_list)) {
> -		pr_debug("range split by unmap in parallel, validate again\n");
> -		r = -EAGAIN;
> -		goto unlock_out;
> -	}
> +		next = min(vma->vm_end, end);
> +		npages = (next - addr) / PAGE_SIZE;

Use >> PAGE_SHIFT for consistency.


> +		r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
> +					       addr, npages, &hmm_range,
> +					       readonly, true, owner);
> +		if (r) {
> +			pr_debug("failed %d to get svm range pages\n", r);
> +			goto unreserve_out;
> +		}
>  
> -	r = svm_range_map_to_gpus(prange, ctx.bitmap, wait);
> +		offset = (addr - start) / PAGE_SIZE;

>> PAGE_SHIFT


> +		r = svm_range_dma_map(prange, ctx.bitmap, offset, npages,
> +				      hmm_range->hmm_pfns);
> +		if (r) {
> +			pr_debug("failed %d to dma map range\n", r);
> +			goto unreserve_out;
> +		}
> +
> +		svm_range_lock(prange);
> +		if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
> +			pr_debug("hmm update the range, need validate again\n");
> +			r = -EAGAIN;
> +			goto unlock_out;
> +		}
> +		if (!list_empty(&prange->child_list)) {
> +			pr_debug("range split by unmap in parallel, validate again\n");
> +			r = -EAGAIN;
> +			goto unlock_out;
> +		}
> +
> +		r = svm_range_map_to_gpus(prange, offset, npages, readonly,
> +					  ctx.bitmap, wait);
>  
>  unlock_out:
> -	svm_range_unlock(prange);
> +		svm_range_unlock(prange);
> +
> +		addr = next;
> +	}
> +
> +	prange->validated_once = true;

Should this be conditional on "!r"?

Regards,
  Felix


> +
>  unreserve_out:
>  	svm_range_unreserve_bos(&ctx);
>  

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] drm/amdkfd: map SVM range with correct access permission
  2021-08-19 21:32   ` Felix Kuehling
@ 2021-08-20 15:44     ` philip yang
  0 siblings, 0 replies; 5+ messages in thread
From: philip yang @ 2021-08-20 15:44 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx, Yang, Philip

[-- Attachment #1: Type: text/html, Size: 11003 bytes --]

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-08-20 15:44 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-19 14:56 [PATCH 1/2] drm/amdkfd: check access permisson to restore retry fault Philip Yang
2021-08-19 14:56 ` [PATCH 2/2] drm/amdkfd: map SVM range with correct access permission Philip Yang
2021-08-19 21:32   ` Felix Kuehling
2021-08-20 15:44     ` philip yang
2021-08-19 21:12 ` [PATCH 1/2] drm/amdkfd: check access permisson to restore retry fault Felix Kuehling

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.