All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/6] Userptr memory mapping support for KFD
@ 2018-03-23 19:32 Felix Kuehling
       [not found] ` <1521833553-31571-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Felix Kuehling @ 2018-03-23 19:32 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Felix Kuehling

Update of remaining patches from the GPUVM patch series. This should apply
on top of the fixes I just sent out.

Felix Kuehling (6):
  drm/amdgpu: Add MMU notifier type for KFD userptr
  drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads
  drm/amdgpu: Avoid reclaim while holding locks taken in MMU notifier
  drm/amdkfd: GFP_NOIO while holding locks taken in MMU notifier
  drm/amdkfd: Add quiesce_mm and resume_mm to kgd2kfd_calls
  drm/amdgpu: Add userptr support for KFD

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       |  12 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 572 ++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c           |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c           | 111 ++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h           |  11 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c          |  38 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c          |  40 +-
 drivers/gpu/drm/amd/amdkfd/kfd_module.c          |   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h            |   4 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c         |  10 +-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h  |   6 +
 13 files changed, 746 insertions(+), 66 deletions(-)

-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 1/6] drm/amdgpu: Add MMU notifier type for KFD userptr
       [not found] ` <1521833553-31571-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
@ 2018-03-23 19:32   ` Felix Kuehling
  2018-03-23 19:32   ` [PATCH 2/6] drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads Felix Kuehling
                     ` (5 subsequent siblings)
  6 siblings, 0 replies; 16+ messages in thread
From: Felix Kuehling @ 2018-03-23 19:32 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Felix Kuehling

This commit adds the notion of MMU notifier types GFX and HSA. GFX
continues to work like MMU notifiers did before. HSA adds support for
KFD userptr BOs. The implementation of KFD userptr eviction is a stub
for now.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  7 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c           |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c           | 94 ++++++++++++++++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h           | 11 ++-
 5 files changed, 97 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index c2c2bea..83e0c5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -104,6 +104,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
 
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t vmid, uint64_t gpu_addr,
 				uint32_t *ib_cmd, uint32_t ib_len);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 2f42c60..2d6f13a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1418,6 +1418,13 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
 	return ret;
 }
 
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
+				struct mm_struct *mm)
+{
+	/* TODO */
+	return 0;
+}
+
 /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
  *   KFD process identified by process_info
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dc34b50..8e66f37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -536,7 +536,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (p->bo_list) {
 		amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 		if (p->bo_list->first_userptr != p->bo_list->num_entries)
-			p->mn = amdgpu_mn_get(p->adev);
+			p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
 	}
 
 	INIT_LIST_HEAD(&duplicates);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index bd67f4c..f2ed18e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -36,12 +36,14 @@
 #include <drm/drm.h>
 
 #include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
 
 struct amdgpu_mn {
 	/* constant after initialisation */
 	struct amdgpu_device	*adev;
 	struct mm_struct	*mm;
 	struct mmu_notifier	mn;
+	enum amdgpu_mn_type	type;
 
 	/* only used on destruction */
 	struct work_struct	work;
@@ -185,7 +187,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 }
 
 /**
- * amdgpu_mn_invalidate_range_start - callback to notify about mm change
+ * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
  *
  * @mn: our notifier
  * @mn: the mm this callback is about
@@ -195,10 +197,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
  * We block for all BOs between start and end to be idle and
  * unmap them by move them into system domain again.
  */
-static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
-					     struct mm_struct *mm,
-					     unsigned long start,
-					     unsigned long end)
+static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
+						 struct mm_struct *mm,
+						 unsigned long start,
+						 unsigned long end)
 {
 	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
 	struct interval_tree_node *it;
@@ -220,6 +222,49 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
 }
 
 /**
+ * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
+ *
+ * @mn: our notifier
+ * @mn: the mm this callback is about
+ * @start: start of updated range
+ * @end: end of updated range
+ *
+ * We temporarily evict all BOs between start and end. This
+ * necessitates evicting all user-mode queues of the process. The BOs
+ * are restorted in amdgpu_mn_invalidate_range_end_hsa.
+ */
+static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
+						 struct mm_struct *mm,
+						 unsigned long start,
+						 unsigned long end)
+{
+	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+	struct interval_tree_node *it;
+
+	/* notification is exclusive, but interval is inclusive */
+	end -= 1;
+
+	amdgpu_mn_read_lock(rmn);
+
+	it = interval_tree_iter_first(&rmn->objects, start, end);
+	while (it) {
+		struct amdgpu_mn_node *node;
+		struct amdgpu_bo *bo;
+
+		node = container_of(it, struct amdgpu_mn_node, it);
+		it = interval_tree_iter_next(it, start, end);
+
+		list_for_each_entry(bo, &node->bos, mn_list) {
+			struct kgd_mem *mem = bo->kfd_bo;
+
+			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
+							 start, end))
+				amdgpu_amdkfd_evict_userptr(mem, mm);
+		}
+	}
+}
+
+/**
  * amdgpu_mn_invalidate_range_end - callback to notify about mm change
  *
  * @mn: our notifier
@@ -239,23 +284,39 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
 	amdgpu_mn_read_unlock(rmn);
 }
 
-static const struct mmu_notifier_ops amdgpu_mn_ops = {
-	.release = amdgpu_mn_release,
-	.invalidate_range_start = amdgpu_mn_invalidate_range_start,
-	.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+	[AMDGPU_MN_TYPE_GFX] = {
+		.release = amdgpu_mn_release,
+		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
+		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+	},
+	[AMDGPU_MN_TYPE_HSA] = {
+		.release = amdgpu_mn_release,
+		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
+		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+	},
 };
 
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
+ */
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
+
 /**
  * amdgpu_mn_get - create notifier context
  *
  * @adev: amdgpu device pointer
+ * @type: type of MMU notifier context
  *
  * Creates a notifier context for current->mm.
  */
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
+struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+				enum amdgpu_mn_type type)
 {
 	struct mm_struct *mm = current->mm;
 	struct amdgpu_mn *rmn;
+	unsigned long key = AMDGPU_MN_KEY(mm, type);
 	int r;
 
 	mutex_lock(&adev->mn_lock);
@@ -264,8 +325,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
 		return ERR_PTR(-EINTR);
 	}
 
-	hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
-		if (rmn->mm == mm)
+	hash_for_each_possible(adev->mn_hash, rmn, node, key)
+		if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key)
 			goto release_locks;
 
 	rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
@@ -276,8 +337,9 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
 
 	rmn->adev = adev;
 	rmn->mm = mm;
-	rmn->mn.ops = &amdgpu_mn_ops;
 	init_rwsem(&rmn->lock);
+	rmn->type = type;
+	rmn->mn.ops = &amdgpu_mn_ops[type];
 	rmn->objects = RB_ROOT_CACHED;
 	mutex_init(&rmn->read_lock);
 	atomic_set(&rmn->recursion, 0);
@@ -286,7 +348,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
 	if (r)
 		goto free_rmn;
 
-	hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
+	hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type));
 
 release_locks:
 	up_write(&mm->mmap_sem);
@@ -315,12 +377,14 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 {
 	unsigned long end = addr + amdgpu_bo_size(bo) - 1;
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	enum amdgpu_mn_type type =
+		bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
 	struct amdgpu_mn *rmn;
 	struct amdgpu_mn_node *node = NULL;
 	struct list_head bos;
 	struct interval_tree_node *it;
 
-	rmn = amdgpu_mn_get(adev);
+	rmn = amdgpu_mn_get(adev, type);
 	if (IS_ERR(rmn))
 		return PTR_ERR(rmn);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index d0095a3..eb0f432 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -29,16 +29,23 @@
  */
 struct amdgpu_mn;
 
+enum amdgpu_mn_type {
+	AMDGPU_MN_TYPE_GFX,
+	AMDGPU_MN_TYPE_HSA,
+};
+
 #if defined(CONFIG_MMU_NOTIFIER)
 void amdgpu_mn_lock(struct amdgpu_mn *mn);
 void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev);
+struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+				enum amdgpu_mn_type type);
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
 #else
 static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
 static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
-static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
+static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+					      enum amdgpu_mn_type type)
 {
 	return NULL;
 }
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 2/6] drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads
       [not found] ` <1521833553-31571-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2018-03-23 19:32   ` [PATCH 1/6] drm/amdgpu: Add MMU notifier type for KFD userptr Felix Kuehling
@ 2018-03-23 19:32   ` Felix Kuehling
       [not found]     ` <1521833553-31571-3-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2018-03-23 19:32   ` [PATCH 3/6] drm/amdgpu: Avoid reclaim while holding locks taken in MMU notifier Felix Kuehling
                     ` (4 subsequent siblings)
  6 siblings, 1 reply; 16+ messages in thread
From: Felix Kuehling @ 2018-03-23 19:32 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Felix Kuehling

This commit allows amdgpu_ttm_tt_get_user_pages to work in a worker
thread rather than regular process context. This will be used when
KFD userptr BOs are restored after an MMU-notifier eviction.

v2: Manage task reference with get_task_struct/put_task_struct

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c2fae04..25490fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -689,7 +689,7 @@ struct amdgpu_ttm_tt {
 	struct ttm_dma_tt	ttm;
 	u64			offset;
 	uint64_t		userptr;
-	struct mm_struct	*usermm;
+	struct task_struct	*usertask;
 	uint32_t		userflags;
 	spinlock_t              guptasklock;
 	struct list_head        guptasks;
@@ -700,14 +700,18 @@ struct amdgpu_ttm_tt {
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+	struct mm_struct *mm = gtt->usertask->mm;
 	unsigned int flags = 0;
 	unsigned pinned = 0;
 	int r;
 
+	if (!mm) /* Happens during process shutdown */
+		return -ESRCH;
+
 	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
 		flags |= FOLL_WRITE;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&mm->mmap_sem);
 
 	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
 		/* check that we only use anonymous memory
@@ -715,9 +719,9 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
 		struct vm_area_struct *vma;
 
-		vma = find_vma(gtt->usermm, gtt->userptr);
+		vma = find_vma(mm, gtt->userptr);
 		if (!vma || vma->vm_file || vma->vm_end < end) {
-			up_read(&current->mm->mmap_sem);
+			up_read(&mm->mmap_sem);
 			return -EPERM;
 		}
 	}
@@ -733,7 +737,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 		list_add(&guptask.list, &gtt->guptasks);
 		spin_unlock(&gtt->guptasklock);
 
-		r = get_user_pages(userptr, num_pages, flags, p, NULL);
+		if (mm == current->mm)
+			r = get_user_pages(userptr, num_pages, flags, p, NULL);
+		else
+			r = get_user_pages_remote(gtt->usertask,
+					mm, userptr, num_pages,
+					flags, p, NULL, NULL);
 
 		spin_lock(&gtt->guptasklock);
 		list_del(&guptask.list);
@@ -746,12 +755,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 
 	} while (pinned < ttm->num_pages);
 
-	up_read(&current->mm->mmap_sem);
+	up_read(&mm->mmap_sem);
 	return 0;
 
 release_pages:
 	release_pages(pages, pinned);
-	up_read(&current->mm->mmap_sem);
+	up_read(&mm->mmap_sem);
 	return r;
 }
 
@@ -972,6 +981,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 
+	if (gtt->usertask)
+		put_task_struct(gtt->usertask);
+
 	ttm_dma_tt_fini(&gtt->ttm);
 	kfree(gtt);
 }
@@ -1072,8 +1084,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 		return -EINVAL;
 
 	gtt->userptr = addr;
-	gtt->usermm = current->mm;
 	gtt->userflags = flags;
+
+	if (gtt->usertask)
+		put_task_struct(gtt->usertask);
+	gtt->usertask = current->group_leader;
+	get_task_struct(gtt->usertask);
+
 	spin_lock_init(&gtt->guptasklock);
 	INIT_LIST_HEAD(&gtt->guptasks);
 	atomic_set(&gtt->mmu_invalidations, 0);
@@ -1089,7 +1106,10 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
 	if (gtt == NULL)
 		return NULL;
 
-	return gtt->usermm;
+	if (gtt->usertask == NULL)
+		return NULL;
+
+	return gtt->usertask->mm;
 }
 
 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 3/6] drm/amdgpu: Avoid reclaim while holding locks taken in MMU notifier
       [not found] ` <1521833553-31571-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2018-03-23 19:32   ` [PATCH 1/6] drm/amdgpu: Add MMU notifier type for KFD userptr Felix Kuehling
  2018-03-23 19:32   ` [PATCH 2/6] drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads Felix Kuehling
@ 2018-03-23 19:32   ` Felix Kuehling
       [not found]     ` <1521833553-31571-4-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2018-03-23 19:32   ` [PATCH 4/6] drm/amdkfd: GFP_NOIO " Felix Kuehling
                     ` (3 subsequent siblings)
  6 siblings, 1 reply; 16+ messages in thread
From: Felix Kuehling @ 2018-03-23 19:32 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Felix Kuehling

When an MMU notifier runs in memory reclaim context, it can deadlock
trying to take locks that are already held in the thread causing the
memory reclaim. The solution is to avoid memory reclaim while holding
locks that are taken in MMU notifiers.

This commit fixes kmalloc while holding rmn->lock by moving the call
outside the lock. The GFX MMU notifier also locks reservation objects.
I have no good solution for avoiding reclaim while holding reservation
objects. The HSA MMU notifier will not lock any reservation objects.

v2: Moved allocation outside lock instead of using GFP_NOIO

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index f2ed18e..83e344f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -380,7 +380,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 	enum amdgpu_mn_type type =
 		bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
 	struct amdgpu_mn *rmn;
-	struct amdgpu_mn_node *node = NULL;
+	struct amdgpu_mn_node *node = NULL, *new_node;
 	struct list_head bos;
 	struct interval_tree_node *it;
 
@@ -388,6 +388,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 	if (IS_ERR(rmn))
 		return PTR_ERR(rmn);
 
+	new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
+	if (!new_node)
+		return -ENOMEM;
+
 	INIT_LIST_HEAD(&bos);
 
 	down_write(&rmn->lock);
@@ -401,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 		list_splice(&node->bos, &bos);
 	}
 
-	if (!node) {
-		node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
-		if (!node) {
-			up_write(&rmn->lock);
-			return -ENOMEM;
-		}
-	}
+	if (!node)
+		node = new_node;
+	else
+		kfree(new_node);
 
 	bo->mn = rmn;
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 4/6] drm/amdkfd: GFP_NOIO while holding locks taken in MMU notifier
       [not found] ` <1521833553-31571-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
                     ` (2 preceding siblings ...)
  2018-03-23 19:32   ` [PATCH 3/6] drm/amdgpu: Avoid reclaim while holding locks taken in MMU notifier Felix Kuehling
@ 2018-03-23 19:32   ` Felix Kuehling
       [not found]     ` <1521833553-31571-5-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
  2018-03-23 19:32   ` [PATCH 5/6] drm/amdkfd: Add quiesce_mm and resume_mm to kgd2kfd_calls Felix Kuehling
                     ` (2 subsequent siblings)
  6 siblings, 1 reply; 16+ messages in thread
From: Felix Kuehling @ 2018-03-23 19:32 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Felix Kuehling

When an MMU notifier runs in memory reclaim context, it can deadlock
trying to take locks that are already held in the thread causing the
memory reclaim. The solution is to avoid memory reclaim while holding
locks that are taken in MMU notifiers by using GFP_NOIO.

This commit fixes memory allocations done while holding the dqm->lock
which is needed in the MMU notifier (dqm->ops.evict_process_queues).

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c          | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 334669996..0434f65 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -652,7 +652,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
 	if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
 		return -ENOMEM;
 
-	*mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+	*mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
 	if ((*mem_obj) == NULL)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index c00c325..2bc49c6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -412,7 +412,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
 	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
 		return NULL;
 
-	mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+	mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
 	if (!mqd)
 		return NULL;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 89e4242..481307b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -394,7 +394,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
 	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
 		return NULL;
 
-	mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+	mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
 	if (!mqd)
 		return NULL;
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 5/6] drm/amdkfd: Add quiesce_mm and resume_mm to kgd2kfd_calls
       [not found] ` <1521833553-31571-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
                     ` (3 preceding siblings ...)
  2018-03-23 19:32   ` [PATCH 4/6] drm/amdkfd: GFP_NOIO " Felix Kuehling
@ 2018-03-23 19:32   ` Felix Kuehling
  2018-03-23 19:32   ` [PATCH 6/6] drm/amdgpu: Add userptr support for KFD Felix Kuehling
  2018-05-11  8:27   ` [PATCH 0/6] Userptr memory mapping " Oded Gabbay
  6 siblings, 0 replies; 16+ messages in thread
From: Felix Kuehling @ 2018-03-23 19:32 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Felix Kuehling

These interfaces allow KGD to stop and resume all GPU user mode queue
access to a process address space. This is needed for handling MMU
notifiers of userptrs mapped for GPU access in KFD VMs.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c         | 38 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_module.c         |  2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h           |  4 +++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c        | 10 +++----
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h |  6 ++++
 5 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 0434f65..7b57995 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -541,6 +541,44 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 	spin_unlock(&kfd->interrupt_lock);
 }
 
+int kgd2kfd_quiesce_mm(struct mm_struct *mm)
+{
+	struct kfd_process *p;
+	int r;
+
+	/* Because we are called from arbitrary context (workqueue) as opposed
+	 * to process context, kfd_process could attempt to exit while we are
+	 * running so the lookup function increments the process ref count.
+	 */
+	p = kfd_lookup_process_by_mm(mm);
+	if (!p)
+		return -ESRCH;
+
+	r = kfd_process_evict_queues(p);
+
+	kfd_unref_process(p);
+	return r;
+}
+
+int kgd2kfd_resume_mm(struct mm_struct *mm)
+{
+	struct kfd_process *p;
+	int r;
+
+	/* Because we are called from arbitrary context (workqueue) as opposed
+	 * to process context, kfd_process could attempt to exit while we are
+	 * running so the lookup function increments the process ref count.
+	 */
+	p = kfd_lookup_process_by_mm(mm);
+	if (!p)
+		return -ESRCH;
+
+	r = kfd_process_restore_queues(p);
+
+	kfd_unref_process(p);
+	return r;
+}
+
 /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
  *   prepare for safe eviction of KFD BOs that belong to the specified
  *   process.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 2237332..a8e92ce 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = {
 	.interrupt	= kgd2kfd_interrupt,
 	.suspend	= kgd2kfd_suspend,
 	.resume		= kgd2kfd_resume,
+	.quiesce_mm	= kgd2kfd_quiesce_mm,
+	.resume_mm	= kgd2kfd_resume_mm,
 	.schedule_evict_and_restore_process =
 			  kgd2kfd_schedule_evict_and_restore_process,
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 96a9cc0..4d5c49e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -512,6 +512,8 @@ struct qcm_process_device {
 /* Approx. time before evicting the process again */
 #define PROCESS_ACTIVE_TIME_MS 10
 
+int kgd2kfd_quiesce_mm(struct mm_struct *mm);
+int kgd2kfd_resume_mm(struct mm_struct *mm);
 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
 					       struct dma_fence *fence);
 
@@ -681,6 +683,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *);
 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
 void kfd_unref_process(struct kfd_process *p);
+int kfd_process_evict_queues(struct kfd_process *p);
+int kfd_process_restore_queues(struct kfd_process *p);
 void kfd_suspend_all_processes(void);
 int kfd_resume_all_processes(void);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 75fdc18..ee43b2f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -808,7 +808,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
  * Eviction is reference-counted per process-device. This means multiple
  * evictions from different sources can be nested safely.
  */
-static int process_evict_queues(struct kfd_process *p)
+int kfd_process_evict_queues(struct kfd_process *p)
 {
 	struct kfd_process_device *pdd;
 	int r = 0;
@@ -844,7 +844,7 @@ static int process_evict_queues(struct kfd_process *p)
 }
 
 /* process_restore_queues - Restore all user queues of a process */
-static  int process_restore_queues(struct kfd_process *p)
+int kfd_process_restore_queues(struct kfd_process *p)
 {
 	struct kfd_process_device *pdd;
 	int r, ret = 0;
@@ -886,7 +886,7 @@ static void evict_process_worker(struct work_struct *work)
 	flush_delayed_work(&p->restore_work);
 
 	pr_debug("Started evicting pasid %d\n", p->pasid);
-	ret = process_evict_queues(p);
+	ret = kfd_process_evict_queues(p);
 	if (!ret) {
 		dma_fence_signal(p->ef);
 		dma_fence_put(p->ef);
@@ -946,7 +946,7 @@ static void restore_process_worker(struct work_struct *work)
 		return;
 	}
 
-	ret = process_restore_queues(p);
+	ret = kfd_process_restore_queues(p);
 	if (!ret)
 		pr_debug("Finished restoring pasid %d\n", p->pasid);
 	else
@@ -963,7 +963,7 @@ void kfd_suspend_all_processes(void)
 		cancel_delayed_work_sync(&p->eviction_work);
 		cancel_delayed_work_sync(&p->restore_work);
 
-		if (process_evict_queues(p))
+		if (kfd_process_evict_queues(p))
 			pr_err("Failed to suspend process %d\n", p->pasid);
 		dma_fence_signal(p->ef);
 		dma_fence_put(p->ef);
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 237289a..286cfe7 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -382,6 +382,10 @@ struct kfd2kgd_calls {
  *
  * @resume: Notifies amdkfd about a resume action done to a kgd device
  *
+ * @quiesce_mm: Quiesce all user queue access to specified MM address space
+ *
+ * @resume_mm: Resume user queue access to specified MM address space
+ *
  * @schedule_evict_and_restore_process: Schedules work queue that will prepare
  * for safe eviction of KFD BOs that belong to the specified process.
  *
@@ -399,6 +403,8 @@ struct kgd2kfd_calls {
 	void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
 	void (*suspend)(struct kfd_dev *kfd);
 	int (*resume)(struct kfd_dev *kfd);
+	int (*quiesce_mm)(struct mm_struct *mm);
+	int (*resume_mm)(struct mm_struct *mm);
 	int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
 			struct dma_fence *fence);
 };
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* [PATCH 6/6] drm/amdgpu: Add userptr support for KFD
       [not found] ` <1521833553-31571-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
                     ` (4 preceding siblings ...)
  2018-03-23 19:32   ` [PATCH 5/6] drm/amdkfd: Add quiesce_mm and resume_mm to kgd2kfd_calls Felix Kuehling
@ 2018-03-23 19:32   ` Felix Kuehling
  2018-05-11  8:27   ` [PATCH 0/6] Userptr memory mapping " Oded Gabbay
  6 siblings, 0 replies; 16+ messages in thread
From: Felix Kuehling @ 2018-03-23 19:32 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Felix Kuehling

This adds support for allocating, mapping, unmapping and freeing
userptr BOs, and for handling MMU notifiers.

v2: updated a comment

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       |  11 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 567 ++++++++++++++++++++++-
 2 files changed, 554 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 83e0c5c..c3024b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -28,6 +28,7 @@
 #include <linux/types.h>
 #include <linux/mm.h>
 #include <linux/mmu_context.h>
+#include <linux/workqueue.h>
 #include <kgd_kfd_interface.h>
 #include <drm/ttm/ttm_execbuf_util.h>
 #include "amdgpu_sync.h"
@@ -59,7 +60,9 @@ struct kgd_mem {
 
 	uint32_t mapping_flags;
 
+	atomic_t invalid;
 	struct amdkfd_process_info *process_info;
+	struct page **user_pages;
 
 	struct amdgpu_sync sync;
 
@@ -84,6 +87,9 @@ struct amdkfd_process_info {
 	struct list_head vm_list_head;
 	/* List head for all KFD BOs that belong to a KFD process. */
 	struct list_head kfd_bo_list;
+	/* List of userptr BOs that are valid or invalid */
+	struct list_head userptr_valid_list;
+	struct list_head userptr_inval_list;
 	/* Lock to protect kfd_bo_list */
 	struct mutex lock;
 
@@ -91,6 +97,11 @@ struct amdkfd_process_info {
 	unsigned int n_vms;
 	/* Eviction Fence */
 	struct amdgpu_amdkfd_fence *eviction_fence;
+
+	/* MMU-notifier related fields */
+	atomic_t evicted_bos;
+	struct delayed_work restore_userptr_work;
+	struct pid *pid;
 };
 
 int amdgpu_amdkfd_init(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 2d6f13a..150f1ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -23,6 +23,7 @@
 #define pr_fmt(fmt) "kfd2kgd: " fmt
 
 #include <linux/list.h>
+#include <linux/sched/mm.h>
 #include <drm/drmP.h>
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
@@ -33,10 +34,20 @@
  */
 #define VI_BO_SIZE_ALIGN (0x8000)
 
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
+
+/* Userptr restore delay, just long enough to allow consecutive VM
+ * changes to accumulate
+ */
+#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+
 /* Impose limit on how much memory KFD can use */
 static struct {
 	uint64_t max_system_mem_limit;
+	uint64_t max_userptr_mem_limit;
 	int64_t system_mem_used;
+	int64_t userptr_mem_used;
 	spinlock_t mem_limit_lock;
 } kfd_mem_limit;
 
@@ -57,6 +68,7 @@ static const char * const domain_bit_to_string[] = {
 
 #define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
 
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
 
 
 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@ -78,6 +90,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
 
 /* Set memory usage limits. Current, limits are
  *  System (kernel) memory - 3/8th System RAM
+ *  Userptr memory - 3/4th System RAM
  */
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
 {
@@ -90,8 +103,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
 
 	spin_lock_init(&kfd_mem_limit.mem_limit_lock);
 	kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
-	pr_debug("Kernel memory limit %lluM\n",
-		(kfd_mem_limit.max_system_mem_limit >> 20));
+	kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
+	pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
+		(kfd_mem_limit.max_system_mem_limit >> 20),
+		(kfd_mem_limit.max_userptr_mem_limit >> 20));
 }
 
 static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
@@ -111,6 +126,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
 			goto err_no_mem;
 		}
 		kfd_mem_limit.system_mem_used += (acc_size + size);
+	} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+		if ((kfd_mem_limit.system_mem_used + acc_size >
+			kfd_mem_limit.max_system_mem_limit) ||
+			(kfd_mem_limit.userptr_mem_used + (size + acc_size) >
+			kfd_mem_limit.max_userptr_mem_limit)) {
+			ret = -ENOMEM;
+			goto err_no_mem;
+		}
+		kfd_mem_limit.system_mem_used += acc_size;
+		kfd_mem_limit.userptr_mem_used += size;
 	}
 err_no_mem:
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
@@ -126,10 +151,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
 				       sizeof(struct amdgpu_bo));
 
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
-	if (domain == AMDGPU_GEM_DOMAIN_GTT)
+	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
 		kfd_mem_limit.system_mem_used -= (acc_size + size);
+	} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+		kfd_mem_limit.system_mem_used -= acc_size;
+		kfd_mem_limit.userptr_mem_used -= size;
+	}
 	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
 		  "kfd system memory accounting unbalanced");
+	WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+		  "kfd userptr memory accounting unbalanced");
 
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
@@ -138,12 +169,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
 {
 	spin_lock(&kfd_mem_limit.mem_limit_lock);
 
-	if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
+	if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
+		kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
+		kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
+	} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
 		kfd_mem_limit.system_mem_used -=
 			(bo->tbo.acc_size + amdgpu_bo_size(bo));
 	}
 	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
 		  "kfd system memory accounting unbalanced");
+	WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+		  "kfd userptr memory accounting unbalanced");
 
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
@@ -506,7 +542,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev,
 }
 
 static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
-				struct amdkfd_process_info *process_info)
+				struct amdkfd_process_info *process_info,
+				bool userptr)
 {
 	struct ttm_validate_buffer *entry = &mem->validate_list;
 	struct amdgpu_bo *bo = mem->bo;
@@ -515,8 +552,93 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
 	entry->shared = true;
 	entry->bo = &bo->tbo;
 	mutex_lock(&process_info->lock);
-	list_add_tail(&entry->head, &process_info->kfd_bo_list);
+	if (userptr)
+		list_add_tail(&entry->head, &process_info->userptr_valid_list);
+	else
+		list_add_tail(&entry->head, &process_info->kfd_bo_list);
+	mutex_unlock(&process_info->lock);
+}
+
+/* Initializes user pages. It registers the MMU notifier and validates
+ * the userptr BO in the GTT domain.
+ *
+ * The BO must already be on the userptr_valid_list. Otherwise an
+ * eviction and restore may happen that leaves the new BO unmapped
+ * with the user mode queues running.
+ *
+ * Takes the process_info->lock to protect against concurrent restore
+ * workers.
+ *
+ * Returns 0 for success, negative errno for errors.
+ */
+static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
+			   uint64_t user_addr)
+{
+	struct amdkfd_process_info *process_info = mem->process_info;
+	struct amdgpu_bo *bo = mem->bo;
+	struct ttm_operation_ctx ctx = { true, false };
+	int ret = 0;
+
+	mutex_lock(&process_info->lock);
+
+	ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
+	if (ret) {
+		pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
+		goto out;
+	}
+
+	ret = amdgpu_mn_register(bo, user_addr);
+	if (ret) {
+		pr_err("%s: Failed to register MMU notifier: %d\n",
+		       __func__, ret);
+		goto out;
+	}
+
+	/* If no restore worker is running concurrently, user_pages
+	 * should not be allocated
+	 */
+	WARN(mem->user_pages, "Leaking user_pages array");
+
+	mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+					   sizeof(struct page *),
+					   GFP_KERNEL | __GFP_ZERO);
+	if (!mem->user_pages) {
+		pr_err("%s: Failed to allocate pages array\n", __func__);
+		ret = -ENOMEM;
+		goto unregister_out;
+	}
+
+	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+	if (ret) {
+		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+		goto free_out;
+	}
+
+	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
+
+	ret = amdgpu_bo_reserve(bo, true);
+	if (ret) {
+		pr_err("%s: Failed to reserve BO\n", __func__);
+		goto release_out;
+	}
+	amdgpu_ttm_placement_from_domain(bo, mem->domain);
+	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	if (ret)
+		pr_err("%s: failed to validate BO\n", __func__);
+	amdgpu_bo_unreserve(bo);
+
+release_out:
+	if (ret)
+		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+free_out:
+	kvfree(mem->user_pages);
+	mem->user_pages = NULL;
+unregister_out:
+	if (ret)
+		amdgpu_mn_unregister(bo);
+out:
 	mutex_unlock(&process_info->lock);
+	return ret;
 }
 
 /* Reserving a BO and its page table BOs must happen atomically to
@@ -748,7 +870,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
 }
 
 static int map_bo_to_gpuvm(struct amdgpu_device *adev,
-		struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
+		struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
+		bool no_update_pte)
 {
 	int ret;
 
@@ -762,6 +885,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
 		return ret;
 	}
 
+	if (no_update_pte)
+		return 0;
+
 	ret = update_gpuvm_pte(adev, entry, sync);
 	if (ret) {
 		pr_err("update_gpuvm_pte() failed\n");
@@ -820,6 +946,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 		mutex_init(&info->lock);
 		INIT_LIST_HEAD(&info->vm_list_head);
 		INIT_LIST_HEAD(&info->kfd_bo_list);
+		INIT_LIST_HEAD(&info->userptr_valid_list);
+		INIT_LIST_HEAD(&info->userptr_inval_list);
 
 		info->eviction_fence =
 			amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
@@ -830,6 +958,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 			goto create_evict_fence_fail;
 		}
 
+		info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
+		atomic_set(&info->evicted_bos, 0);
+		INIT_DELAYED_WORK(&info->restore_userptr_work,
+				  amdgpu_amdkfd_restore_userptr_worker);
+
 		*process_info = info;
 		*ef = dma_fence_get(&info->eviction_fence->base);
 	}
@@ -872,6 +1005,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 		dma_fence_put(*ef);
 		*ef = NULL;
 		*process_info = NULL;
+		put_pid(info->pid);
 create_evict_fence_fail:
 		mutex_destroy(&info->lock);
 		kfree(info);
@@ -967,8 +1101,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 	/* Release per-process resources when last compute VM is destroyed */
 	if (!process_info->n_vms) {
 		WARN_ON(!list_empty(&process_info->kfd_bo_list));
+		WARN_ON(!list_empty(&process_info->userptr_valid_list));
+		WARN_ON(!list_empty(&process_info->userptr_inval_list));
 
 		dma_fence_put(&process_info->eviction_fence->base);
+		cancel_delayed_work_sync(&process_info->restore_userptr_work);
+		put_pid(process_info->pid);
 		mutex_destroy(&process_info->lock);
 		kfree(process_info);
 	}
@@ -1003,9 +1141,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+	uint64_t user_addr = 0;
 	struct amdgpu_bo *bo;
 	int byte_align;
-	u32 alloc_domain;
+	u32 domain, alloc_domain;
 	u64 alloc_flags;
 	uint32_t mapping_flags;
 	int ret;
@@ -1014,14 +1153,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	 * Check on which domain to allocate BO
 	 */
 	if (flags & ALLOC_MEM_FLAGS_VRAM) {
-		alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
+		domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
 		alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
 		alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
 			AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
 			AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
 	} else if (flags & ALLOC_MEM_FLAGS_GTT) {
-		alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+		domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
 		alloc_flags = 0;
+	} else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
+		domain = AMDGPU_GEM_DOMAIN_GTT;
+		alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
+		alloc_flags = 0;
+		if (!offset || !*offset)
+			return -EINVAL;
+		user_addr = *offset;
 	} else {
 		return -EINVAL;
 	}
@@ -1078,18 +1224,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	}
 	bo->kfd_bo = *mem;
 	(*mem)->bo = bo;
+	if (user_addr)
+		bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
 
 	(*mem)->va = va;
-	(*mem)->domain = alloc_domain;
+	(*mem)->domain = domain;
 	(*mem)->mapped_to_gpu_memory = 0;
 	(*mem)->process_info = avm->process_info;
-	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
+	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
+
+	if (user_addr) {
+		ret = init_user_pages(*mem, current->mm, user_addr);
+		if (ret) {
+			mutex_lock(&avm->process_info->lock);
+			list_del(&(*mem)->validate_list.head);
+			mutex_unlock(&avm->process_info->lock);
+			goto allocate_init_user_pages_failed;
+		}
+	}
 
 	if (offset)
 		*offset = amdgpu_bo_mmap_offset(bo);
 
 	return 0;
 
+allocate_init_user_pages_failed:
+	amdgpu_bo_unref(&bo);
+	/* Don't unreserve system mem limit twice */
+	goto err_reserve_system_mem;
 err_bo_create:
 	unreserve_system_mem_limit(adev, size, alloc_domain);
 err_reserve_system_mem:
@@ -1122,12 +1284,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	 * be freed anyway
 	 */
 
+	/* No more MMU notifiers */
+	amdgpu_mn_unregister(mem->bo);
+
 	/* Make sure restore workers don't access the BO any more */
 	bo_list_entry = &mem->validate_list;
 	mutex_lock(&process_info->lock);
 	list_del(&bo_list_entry->head);
 	mutex_unlock(&process_info->lock);
 
+	/* Free user pages if necessary */
+	if (mem->user_pages) {
+		pr_debug("%s: Freeing user_pages array\n", __func__);
+		if (mem->user_pages[0])
+			release_pages(mem->user_pages,
+					mem->bo->tbo.ttm->num_pages);
+		kvfree(mem->user_pages);
+	}
+
 	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
 	if (unlikely(ret))
 		return ret;
@@ -1173,21 +1347,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	struct kfd_bo_va_list *bo_va_entry = NULL;
 	struct kfd_bo_va_list *bo_va_entry_aql = NULL;
 	unsigned long bo_size;
-
-	/* Make sure restore is not running concurrently.
-	 */
-	mutex_lock(&mem->process_info->lock);
-
-	mutex_lock(&mem->lock);
+	bool is_invalid_userptr = false;
 
 	bo = mem->bo;
-
 	if (!bo) {
 		pr_err("Invalid BO when mapping memory to GPU\n");
-		ret = -EINVAL;
-		goto out;
+		return -EINVAL;
+	}
+
+	/* Make sure restore is not running concurrently. Since we
+	 * don't map invalid userptr BOs, we rely on the next restore
+	 * worker to do the mapping
+	 */
+	mutex_lock(&mem->process_info->lock);
+
+	/* Lock mmap-sem. If we find an invalid userptr BO, we can be
+	 * sure that the MMU notifier is no longer running
+	 * concurrently and the queues are actually stopped
+	 */
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+		down_write(&current->mm->mmap_sem);
+		is_invalid_userptr = atomic_read(&mem->invalid);
+		up_write(&current->mm->mmap_sem);
 	}
 
+	mutex_lock(&mem->lock);
+
 	domain = mem->domain;
 	bo_size = bo->tbo.mem.size;
 
@@ -1200,6 +1385,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	if (unlikely(ret))
 		goto out;
 
+	/* Userptr can be marked as "not invalid", but not actually be
+	 * validated yet (still in the system domain). In that case
+	 * the queues are still stopped and we can leave mapping for
+	 * the next restore worker
+	 */
+	if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+		is_invalid_userptr = true;
+
 	if (check_if_add_bo_to_vm(avm, mem)) {
 		ret = add_bo_to_vm(adev, mem, avm, false,
 				&bo_va_entry);
@@ -1217,7 +1410,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 			goto add_bo_to_vm_failed;
 	}
 
-	if (mem->mapped_to_gpu_memory == 0) {
+	if (mem->mapped_to_gpu_memory == 0 &&
+	    !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
 		/* Validate BO only once. The eviction fence gets added to BO
 		 * the first time it is mapped. Validate will wait for all
 		 * background evictions to complete.
@@ -1235,7 +1429,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 					entry->va, entry->va + bo_size,
 					entry);
 
-			ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
+			ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
+					      is_invalid_userptr);
 			if (ret) {
 				pr_err("Failed to map radeon bo to gpuvm\n");
 				goto map_bo_to_gpuvm_failed;
@@ -1418,13 +1613,337 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
 	return ret;
 }
 
+/* Evict a userptr BO by stopping the queues if necessary
+ *
+ * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
+ * cannot do any memory allocations, and cannot take any locks that
+ * are held elsewhere while allocating memory. Therefore this is as
+ * simple as possible, using atomic counters.
+ *
+ * It doesn't do anything to the BO itself. The real work happens in
+ * restore, where we get updated page addresses. This function only
+ * ensures that GPU access to the BO is stopped.
+ */
 int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
 				struct mm_struct *mm)
 {
-	/* TODO */
+	struct amdkfd_process_info *process_info = mem->process_info;
+	int invalid, evicted_bos;
+	int r = 0;
+
+	invalid = atomic_inc_return(&mem->invalid);
+	evicted_bos = atomic_inc_return(&process_info->evicted_bos);
+	if (evicted_bos == 1) {
+		/* First eviction, stop the queues */
+		r = kgd2kfd->quiesce_mm(mm);
+		if (r)
+			pr_err("Failed to quiesce KFD\n");
+		schedule_delayed_work(&process_info->restore_userptr_work,
+			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+	}
+
+	return r;
+}
+
+/* Update invalid userptr BOs
+ *
+ * Moves invalidated (evicted) userptr BOs from userptr_valid_list to
+ * userptr_inval_list and updates user pages for all BOs that have
+ * been invalidated since their last update.
+ */
+static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
+				     struct mm_struct *mm)
+{
+	struct kgd_mem *mem, *tmp_mem;
+	struct amdgpu_bo *bo;
+	struct ttm_operation_ctx ctx = { false, false };
+	int invalid, ret;
+
+	/* Move all invalidated BOs to the userptr_inval_list and
+	 * release their user pages by migration to the CPU domain
+	 */
+	list_for_each_entry_safe(mem, tmp_mem,
+				 &process_info->userptr_valid_list,
+				 validate_list.head) {
+		if (!atomic_read(&mem->invalid))
+			continue; /* BO is still valid */
+
+		bo = mem->bo;
+
+		if (amdgpu_bo_reserve(bo, true))
+			return -EAGAIN;
+		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+		amdgpu_bo_unreserve(bo);
+		if (ret) {
+			pr_err("%s: Failed to invalidate userptr BO\n",
+			       __func__);
+			return -EAGAIN;
+		}
+
+		list_move_tail(&mem->validate_list.head,
+			       &process_info->userptr_inval_list);
+	}
+
+	if (list_empty(&process_info->userptr_inval_list))
+		return 0; /* All evicted userptr BOs were freed */
+
+	/* Go through userptr_inval_list and update any invalid user_pages */
+	list_for_each_entry(mem, &process_info->userptr_inval_list,
+			    validate_list.head) {
+		invalid = atomic_read(&mem->invalid);
+		if (!invalid)
+			/* BO hasn't been invalidated since the last
+			 * revalidation attempt. Keep its BO list.
+			 */
+			continue;
+
+		bo = mem->bo;
+
+		if (!mem->user_pages) {
+			mem->user_pages =
+				kvmalloc_array(bo->tbo.ttm->num_pages,
+						 sizeof(struct page *),
+						 GFP_KERNEL | __GFP_ZERO);
+			if (!mem->user_pages) {
+				pr_err("%s: Failed to allocate pages array\n",
+				       __func__);
+				return -ENOMEM;
+			}
+		} else if (mem->user_pages[0]) {
+			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+		}
+
+		/* Get updated user pages */
+		ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
+						   mem->user_pages);
+		if (ret) {
+			mem->user_pages[0] = NULL;
+			pr_info("%s: Failed to get user pages: %d\n",
+				__func__, ret);
+			/* Pretend it succeeded. It will fail later
+			 * with a VM fault if the GPU tries to access
+			 * it. Better than hanging indefinitely with
+			 * stalled user mode queues.
+			 */
+		}
+
+		/* Mark the BO as valid unless it was invalidated
+		 * again concurrently
+		 */
+		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
+			return -EAGAIN;
+	}
+
 	return 0;
 }
 
+/* Validate invalid userptr BOs
+ *
+ * Validates BOs on the userptr_inval_list, and moves them back to the
+ * userptr_valid_list. Also updates GPUVM page tables with new page
+ * addresses and waits for the page table updates to complete.
+ */
+static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
+{
+	struct amdgpu_bo_list_entry *pd_bo_list_entries;
+	struct list_head resv_list, duplicates;
+	struct ww_acquire_ctx ticket;
+	struct amdgpu_sync sync;
+
+	struct amdgpu_vm *peer_vm;
+	struct kgd_mem *mem, *tmp_mem;
+	struct amdgpu_bo *bo;
+	struct ttm_operation_ctx ctx = { false, false };
+	int i, ret;
+
+	pd_bo_list_entries = kcalloc(process_info->n_vms,
+				     sizeof(struct amdgpu_bo_list_entry),
+				     GFP_KERNEL);
+	if (!pd_bo_list_entries) {
+		pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&resv_list);
+	INIT_LIST_HEAD(&duplicates);
+
+	/* Get all the page directory BOs that need to be reserved */
+	i = 0;
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node)
+		amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
+				    &pd_bo_list_entries[i++]);
+	/* Add the userptr_inval_list entries to resv_list */
+	list_for_each_entry(mem, &process_info->userptr_inval_list,
+			    validate_list.head) {
+		list_add_tail(&mem->resv_list.head, &resv_list);
+		mem->resv_list.bo = mem->validate_list.bo;
+		mem->resv_list.shared = mem->validate_list.shared;
+	}
+
+	/* Reserve all BOs and page tables for validation */
+	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
+	WARN(!list_empty(&duplicates), "Duplicates should be empty");
+	if (ret)
+		goto out;
+
+	amdgpu_sync_create(&sync);
+
+	/* Avoid triggering eviction fences when unmapping invalid
+	 * userptr BOs (waits for all fences, doesn't use
+	 * FENCE_OWNER_VM)
+	 */
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node)
+		amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
+						process_info->eviction_fence,
+						NULL, NULL);
+
+	ret = process_validate_vms(process_info);
+	if (ret)
+		goto unreserve_out;
+
+	/* Validate BOs and update GPUVM page tables */
+	list_for_each_entry_safe(mem, tmp_mem,
+				 &process_info->userptr_inval_list,
+				 validate_list.head) {
+		struct kfd_bo_va_list *bo_va_entry;
+
+		bo = mem->bo;
+
+		/* Copy pages array and validate the BO if we got user pages */
+		if (mem->user_pages[0]) {
+			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
+						     mem->user_pages);
+			amdgpu_ttm_placement_from_domain(bo, mem->domain);
+			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+			if (ret) {
+				pr_err("%s: failed to validate BO\n", __func__);
+				goto unreserve_out;
+			}
+		}
+
+		/* Validate succeeded, now the BO owns the pages, free
+		 * our copy of the pointer array. Put this BO back on
+		 * the userptr_valid_list. If we need to revalidate
+		 * it, we need to start from scratch.
+		 */
+		kvfree(mem->user_pages);
+		mem->user_pages = NULL;
+		list_move_tail(&mem->validate_list.head,
+			       &process_info->userptr_valid_list);
+
+		/* Update mapping. If the BO was not validated
+		 * (because we couldn't get user pages), this will
+		 * clear the page table entries, which will result in
+		 * VM faults if the GPU tries to access the invalid
+		 * memory.
+		 */
+		list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
+			if (!bo_va_entry->is_mapped)
+				continue;
+
+			ret = update_gpuvm_pte((struct amdgpu_device *)
+					       bo_va_entry->kgd_dev,
+					       bo_va_entry, &sync);
+			if (ret) {
+				pr_err("%s: update PTE failed\n", __func__);
+				/* make sure this gets validated again */
+				atomic_inc(&mem->invalid);
+				goto unreserve_out;
+			}
+		}
+	}
+
+	/* Update page directories */
+	ret = process_update_pds(process_info, &sync);
+
+unreserve_out:
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			    vm_list_node)
+		amdgpu_bo_fence(peer_vm->root.base.bo,
+				&process_info->eviction_fence->base, true);
+	ttm_eu_backoff_reservation(&ticket, &resv_list);
+	amdgpu_sync_wait(&sync, false);
+	amdgpu_sync_free(&sync);
+out:
+	kfree(pd_bo_list_entries);
+
+	return ret;
+}
+
+/* Worker callback to restore evicted userptr BOs
+ *
+ * Tries to update and validate all userptr BOs. If successful and no
+ * concurrent evictions happened, the queues are restarted. Otherwise,
+ * reschedule for another attempt later.
+ */
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct amdkfd_process_info *process_info =
+		container_of(dwork, struct amdkfd_process_info,
+			     restore_userptr_work);
+	struct task_struct *usertask;
+	struct mm_struct *mm;
+	int evicted_bos;
+
+	evicted_bos = atomic_read(&process_info->evicted_bos);
+	if (!evicted_bos)
+		return;
+
+	/* Reference task and mm in case of concurrent process termination */
+	usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
+	if (!usertask)
+		return;
+	mm = get_task_mm(usertask);
+	if (!mm) {
+		put_task_struct(usertask);
+		return;
+	}
+
+	mutex_lock(&process_info->lock);
+
+	if (update_invalid_user_pages(process_info, mm))
+		goto unlock_out;
+	/* userptr_inval_list can be empty if all evicted userptr BOs
+	 * have been freed. In that case there is nothing to validate
+	 * and we can just restart the queues.
+	 */
+	if (!list_empty(&process_info->userptr_inval_list)) {
+		if (atomic_read(&process_info->evicted_bos) != evicted_bos)
+			goto unlock_out; /* Concurrent eviction, try again */
+
+		if (validate_invalid_user_pages(process_info))
+			goto unlock_out;
+	}
+	/* Final check for concurrent evicton and atomic update. If
+	 * another eviction happens after successful update, it will
+	 * be a first eviction that calls quiesce_mm. The eviction
+	 * reference counting inside KFD will handle this case.
+	 */
+	if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
+	    evicted_bos)
+		goto unlock_out;
+	evicted_bos = 0;
+	if (kgd2kfd->resume_mm(mm)) {
+		pr_err("%s: Failed to resume KFD\n", __func__);
+		/* No recovery from this failure. Probably the CP is
+		 * hanging. No point trying again.
+		 */
+	}
+unlock_out:
+	mutex_unlock(&process_info->lock);
+	mmput(mm);
+	put_task_struct(usertask);
+
+	/* If validation failed, reschedule another attempt */
+	if (evicted_bos)
+		schedule_delayed_work(&process_info->restore_userptr_work,
+			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+}
+
 /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
  *   KFD process identified by process_info
  *
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/6] drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads
       [not found]     ` <1521833553-31571-3-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
@ 2018-05-11  7:36       ` Oded Gabbay
       [not found]         ` <CAFCwf114qcOrKVOrt0JvxShvXdA8tdAKDCSHDK0DAJjM5rAQjg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Oded Gabbay @ 2018-05-11  7:36 UTC (permalink / raw)
  To: Felix Kuehling; +Cc: amd-gfx list

Hi Felix,
The patch looks fine to me and I can ACK it, but I would prefer that
Alex or Christian will review it as well before pushing it.
Thanks,
Oded

On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
> This commit allows amdgpu_ttm_tt_get_user_pages to work in a worker
> thread rather than regular process context. This will be used when
> KFD userptr BOs are restored after an MMU-notifier eviction.
>
> v2: Manage task reference with get_task_struct/put_task_struct
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++++++++++++++++++++++++--------
>  1 file changed, 29 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c2fae04..25490fe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -689,7 +689,7 @@ struct amdgpu_ttm_tt {
>         struct ttm_dma_tt       ttm;
>         u64                     offset;
>         uint64_t                userptr;
> -       struct mm_struct        *usermm;
> +       struct task_struct      *usertask;
>         uint32_t                userflags;
>         spinlock_t              guptasklock;
>         struct list_head        guptasks;
> @@ -700,14 +700,18 @@ struct amdgpu_ttm_tt {
>  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>  {
>         struct amdgpu_ttm_tt *gtt = (void *)ttm;
> +       struct mm_struct *mm = gtt->usertask->mm;
>         unsigned int flags = 0;
>         unsigned pinned = 0;
>         int r;
>
> +       if (!mm) /* Happens during process shutdown */
> +               return -ESRCH;
> +
>         if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
>                 flags |= FOLL_WRITE;
>
> -       down_read(&current->mm->mmap_sem);
> +       down_read(&mm->mmap_sem);
>
>         if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
>                 /* check that we only use anonymous memory
> @@ -715,9 +719,9 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>                 unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
>                 struct vm_area_struct *vma;
>
> -               vma = find_vma(gtt->usermm, gtt->userptr);
> +               vma = find_vma(mm, gtt->userptr);
>                 if (!vma || vma->vm_file || vma->vm_end < end) {
> -                       up_read(&current->mm->mmap_sem);
> +                       up_read(&mm->mmap_sem);
>                         return -EPERM;
>                 }
>         }
> @@ -733,7 +737,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>                 list_add(&guptask.list, &gtt->guptasks);
>                 spin_unlock(&gtt->guptasklock);
>
> -               r = get_user_pages(userptr, num_pages, flags, p, NULL);
> +               if (mm == current->mm)
> +                       r = get_user_pages(userptr, num_pages, flags, p, NULL);
> +               else
> +                       r = get_user_pages_remote(gtt->usertask,
> +                                       mm, userptr, num_pages,
> +                                       flags, p, NULL, NULL);
>
>                 spin_lock(&gtt->guptasklock);
>                 list_del(&guptask.list);
> @@ -746,12 +755,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>
>         } while (pinned < ttm->num_pages);
>
> -       up_read(&current->mm->mmap_sem);
> +       up_read(&mm->mmap_sem);
>         return 0;
>
>  release_pages:
>         release_pages(pages, pinned);
> -       up_read(&current->mm->mmap_sem);
> +       up_read(&mm->mmap_sem);
>         return r;
>  }
>
> @@ -972,6 +981,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
>  {
>         struct amdgpu_ttm_tt *gtt = (void *)ttm;
>
> +       if (gtt->usertask)
> +               put_task_struct(gtt->usertask);
> +
>         ttm_dma_tt_fini(&gtt->ttm);
>         kfree(gtt);
>  }
> @@ -1072,8 +1084,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>                 return -EINVAL;
>
>         gtt->userptr = addr;
> -       gtt->usermm = current->mm;
>         gtt->userflags = flags;
> +
> +       if (gtt->usertask)
> +               put_task_struct(gtt->usertask);
> +       gtt->usertask = current->group_leader;
> +       get_task_struct(gtt->usertask);
> +
>         spin_lock_init(&gtt->guptasklock);
>         INIT_LIST_HEAD(&gtt->guptasks);
>         atomic_set(&gtt->mmu_invalidations, 0);
> @@ -1089,7 +1106,10 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
>         if (gtt == NULL)
>                 return NULL;
>
> -       return gtt->usermm;
> +       if (gtt->usertask == NULL)
> +               return NULL;
> +
> +       return gtt->usertask->mm;
>  }
>
>  bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
> --
> 2.7.4
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/6] drm/amdgpu: Avoid reclaim while holding locks taken in MMU notifier
       [not found]     ` <1521833553-31571-4-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
@ 2018-05-11  7:38       ` Oded Gabbay
       [not found]         ` <CAFCwf13ihv88TGtSWXovmOawkcboMRuVOxih-R_-CnJC5h1=Qw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Oded Gabbay @ 2018-05-11  7:38 UTC (permalink / raw)
  To: Felix Kuehling, Alex Deucher, Christian König; +Cc: amd-gfx list

Hi Felix,
Same as patch 2. ACK but I want an extra review from amdgpu people.
Thanks,
Oded

On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
> When an MMU notifier runs in memory reclaim context, it can deadlock
> trying to take locks that are already held in the thread causing the
> memory reclaim. The solution is to avoid memory reclaim while holding
> locks that are taken in MMU notifiers.
>
> This commit fixes kmalloc while holding rmn->lock by moving the call
> outside the lock. The GFX MMU notifier also locks reservation objects.
> I have no good solution for avoiding reclaim while holding reservation
> objects. The HSA MMU notifier will not lock any reservation objects.
>
> v2: Moved allocation outside lock instead of using GFP_NOIO
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 17 +++++++++--------
>  1 file changed, 9 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index f2ed18e..83e344f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -380,7 +380,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>         enum amdgpu_mn_type type =
>                 bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
>         struct amdgpu_mn *rmn;
> -       struct amdgpu_mn_node *node = NULL;
> +       struct amdgpu_mn_node *node = NULL, *new_node;
>         struct list_head bos;
>         struct interval_tree_node *it;
>
> @@ -388,6 +388,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>         if (IS_ERR(rmn))
>                 return PTR_ERR(rmn);
>
> +       new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
> +       if (!new_node)
> +               return -ENOMEM;
> +
>         INIT_LIST_HEAD(&bos);
>
>         down_write(&rmn->lock);
> @@ -401,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>                 list_splice(&node->bos, &bos);
>         }
>
> -       if (!node) {
> -               node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
> -               if (!node) {
> -                       up_write(&rmn->lock);
> -                       return -ENOMEM;
> -               }
> -       }
> +       if (!node)
> +               node = new_node;
> +       else
> +               kfree(new_node);
>
>         bo->mn = rmn;
>
> --
> 2.7.4
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/6] drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads
       [not found]         ` <CAFCwf114qcOrKVOrt0JvxShvXdA8tdAKDCSHDK0DAJjM5rAQjg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2018-05-11  7:39           ` Oded Gabbay
  2018-05-11 18:11           ` Alex Deucher
  1 sibling, 0 replies; 16+ messages in thread
From: Oded Gabbay @ 2018-05-11  7:39 UTC (permalink / raw)
  To: Felix Kuehling, Alex Deucher, Christian König; +Cc: amd-gfx list

Adding Alex, Christian

On Fri, May 11, 2018 at 10:36 AM, Oded Gabbay <oded.gabbay@gmail.com> wrote:
> Hi Felix,
> The patch looks fine to me and I can ACK it, but I would prefer that
> Alex or Christian will review it as well before pushing it.
> Thanks,
> Oded
>
> On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
>> This commit allows amdgpu_ttm_tt_get_user_pages to work in a worker
>> thread rather than regular process context. This will be used when
>> KFD userptr BOs are restored after an MMU-notifier eviction.
>>
>> v2: Manage task reference with get_task_struct/put_task_struct
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++++++++++++++++++++++++--------
>>  1 file changed, 29 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index c2fae04..25490fe 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -689,7 +689,7 @@ struct amdgpu_ttm_tt {
>>         struct ttm_dma_tt       ttm;
>>         u64                     offset;
>>         uint64_t                userptr;
>> -       struct mm_struct        *usermm;
>> +       struct task_struct      *usertask;
>>         uint32_t                userflags;
>>         spinlock_t              guptasklock;
>>         struct list_head        guptasks;
>> @@ -700,14 +700,18 @@ struct amdgpu_ttm_tt {
>>  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>  {
>>         struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> +       struct mm_struct *mm = gtt->usertask->mm;
>>         unsigned int flags = 0;
>>         unsigned pinned = 0;
>>         int r;
>>
>> +       if (!mm) /* Happens during process shutdown */
>> +               return -ESRCH;
>> +
>>         if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
>>                 flags |= FOLL_WRITE;
>>
>> -       down_read(&current->mm->mmap_sem);
>> +       down_read(&mm->mmap_sem);
>>
>>         if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
>>                 /* check that we only use anonymous memory
>> @@ -715,9 +719,9 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>                 unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
>>                 struct vm_area_struct *vma;
>>
>> -               vma = find_vma(gtt->usermm, gtt->userptr);
>> +               vma = find_vma(mm, gtt->userptr);
>>                 if (!vma || vma->vm_file || vma->vm_end < end) {
>> -                       up_read(&current->mm->mmap_sem);
>> +                       up_read(&mm->mmap_sem);
>>                         return -EPERM;
>>                 }
>>         }
>> @@ -733,7 +737,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>                 list_add(&guptask.list, &gtt->guptasks);
>>                 spin_unlock(&gtt->guptasklock);
>>
>> -               r = get_user_pages(userptr, num_pages, flags, p, NULL);
>> +               if (mm == current->mm)
>> +                       r = get_user_pages(userptr, num_pages, flags, p, NULL);
>> +               else
>> +                       r = get_user_pages_remote(gtt->usertask,
>> +                                       mm, userptr, num_pages,
>> +                                       flags, p, NULL, NULL);
>>
>>                 spin_lock(&gtt->guptasklock);
>>                 list_del(&guptask.list);
>> @@ -746,12 +755,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>
>>         } while (pinned < ttm->num_pages);
>>
>> -       up_read(&current->mm->mmap_sem);
>> +       up_read(&mm->mmap_sem);
>>         return 0;
>>
>>  release_pages:
>>         release_pages(pages, pinned);
>> -       up_read(&current->mm->mmap_sem);
>> +       up_read(&mm->mmap_sem);
>>         return r;
>>  }
>>
>> @@ -972,6 +981,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
>>  {
>>         struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>
>> +       if (gtt->usertask)
>> +               put_task_struct(gtt->usertask);
>> +
>>         ttm_dma_tt_fini(&gtt->ttm);
>>         kfree(gtt);
>>  }
>> @@ -1072,8 +1084,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>>                 return -EINVAL;
>>
>>         gtt->userptr = addr;
>> -       gtt->usermm = current->mm;
>>         gtt->userflags = flags;
>> +
>> +       if (gtt->usertask)
>> +               put_task_struct(gtt->usertask);
>> +       gtt->usertask = current->group_leader;
>> +       get_task_struct(gtt->usertask);
>> +
>>         spin_lock_init(&gtt->guptasklock);
>>         INIT_LIST_HEAD(&gtt->guptasks);
>>         atomic_set(&gtt->mmu_invalidations, 0);
>> @@ -1089,7 +1106,10 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
>>         if (gtt == NULL)
>>                 return NULL;
>>
>> -       return gtt->usermm;
>> +       if (gtt->usertask == NULL)
>> +               return NULL;
>> +
>> +       return gtt->usertask->mm;
>>  }
>>
>>  bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>> --
>> 2.7.4
>>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/6] drm/amdkfd: GFP_NOIO while holding locks taken in MMU notifier
       [not found]     ` <1521833553-31571-5-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
@ 2018-05-11  7:59       ` Oded Gabbay
       [not found]         ` <CAFCwf13=swLVZ-FeHtsV9wUxpbA0ghZ2enSLvfZG8vA4g_Jc3Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Oded Gabbay @ 2018-05-11  7:59 UTC (permalink / raw)
  To: Felix Kuehling; +Cc: amd-gfx list

On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
> When an MMU notifier runs in memory reclaim context, it can deadlock
> trying to take locks that are already held in the thread causing the
> memory reclaim. The solution is to avoid memory reclaim while holding
> locks that are taken in MMU notifiers by using GFP_NOIO.

Which locks are problematic ?

The kernel recommendation is to use "memalloc_noio_{save,restore} to
mark the whole scope which cannot perform any IO with a short
explanation why"

By using the scope functions, you protect against future allocation
code that will be written in the critical path, without worrying about
the developer using the correct GFP_NOIO flag.

Oded

>
> This commit fixes memory allocations done while holding the dqm->lock
> which is needed in the MMU notifier (dqm->ops.evict_process_queues).
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c          | 2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  | 2 +-
>  3 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 334669996..0434f65 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -652,7 +652,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
>         if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
>                 return -ENOMEM;
>
> -       *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
> +       *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
>         if ((*mem_obj) == NULL)
>                 return -ENOMEM;
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> index c00c325..2bc49c6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> @@ -412,7 +412,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
>         if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
>                 return NULL;
>
> -       mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
> +       mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
>         if (!mqd)
>                 return NULL;
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> index 89e4242..481307b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> @@ -394,7 +394,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
>         if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
>                 return NULL;
>
> -       mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
> +       mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
>         if (!mqd)
>                 return NULL;
>
> --
> 2.7.4
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 0/6] Userptr memory mapping support for KFD
       [not found] ` <1521833553-31571-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
                     ` (5 preceding siblings ...)
  2018-03-23 19:32   ` [PATCH 6/6] drm/amdgpu: Add userptr support for KFD Felix Kuehling
@ 2018-05-11  8:27   ` Oded Gabbay
  6 siblings, 0 replies; 16+ messages in thread
From: Oded Gabbay @ 2018-05-11  8:27 UTC (permalink / raw)
  To: Felix Kuehling; +Cc: amd-gfx list

On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
> Update of remaining patches from the GPUVM patch series. This should apply
> on top of the fixes I just sent out.
>
> Felix Kuehling (6):
>   drm/amdgpu: Add MMU notifier type for KFD userptr
>   drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads
>   drm/amdgpu: Avoid reclaim while holding locks taken in MMU notifier
>   drm/amdkfd: GFP_NOIO while holding locks taken in MMU notifier
>   drm/amdkfd: Add quiesce_mm and resume_mm to kgd2kfd_calls
>   drm/amdgpu: Add userptr support for KFD
>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h       |  12 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 572 ++++++++++++++++++++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c           |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c           | 111 ++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h           |  11 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c          |  38 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c          |  40 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_module.c          |   2 +
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c |   2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  |   2 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h            |   4 +
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c         |  10 +-
>  drivers/gpu/drm/amd/include/kgd_kfd_interface.h  |   6 +
>  13 files changed, 746 insertions(+), 66 deletions(-)
>
> --
> 2.7.4
>

Hi Felix,

Patch 1, 5 & 6 are reviewed by me.
Patches 2 & 3 are fine but I would like approval from Alex/Christian
Patch 4 - I think we can do better. See my comments there.

Thanks,
Oded
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 3/6] drm/amdgpu: Avoid reclaim while holding locks taken in MMU notifier
       [not found]         ` <CAFCwf13ihv88TGtSWXovmOawkcboMRuVOxih-R_-CnJC5h1=Qw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2018-05-11  9:06           ` Christian König
  0 siblings, 0 replies; 16+ messages in thread
From: Christian König @ 2018-05-11  9:06 UTC (permalink / raw)
  To: Oded Gabbay, Felix Kuehling, Alex Deucher; +Cc: amd-gfx list

Hi Oded,

it looks sane to me and is Reviewed-by: Christian König 
<christian.koenig@amd.com>.

Christian.

Am 11.05.2018 um 09:38 schrieb Oded Gabbay:
> Hi Felix,
> Same as patch 2. ACK but I want an extra review from amdgpu people.
> Thanks,
> Oded
>
> On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
>> When an MMU notifier runs in memory reclaim context, it can deadlock
>> trying to take locks that are already held in the thread causing the
>> memory reclaim. The solution is to avoid memory reclaim while holding
>> locks that are taken in MMU notifiers.
>>
>> This commit fixes kmalloc while holding rmn->lock by moving the call
>> outside the lock. The GFX MMU notifier also locks reservation objects.
>> I have no good solution for avoiding reclaim while holding reservation
>> objects. The HSA MMU notifier will not lock any reservation objects.
>>
>> v2: Moved allocation outside lock instead of using GFP_NOIO
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 17 +++++++++--------
>>   1 file changed, 9 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> index f2ed18e..83e344f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> @@ -380,7 +380,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>>          enum amdgpu_mn_type type =
>>                  bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
>>          struct amdgpu_mn *rmn;
>> -       struct amdgpu_mn_node *node = NULL;
>> +       struct amdgpu_mn_node *node = NULL, *new_node;
>>          struct list_head bos;
>>          struct interval_tree_node *it;
>>
>> @@ -388,6 +388,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>>          if (IS_ERR(rmn))
>>                  return PTR_ERR(rmn);
>>
>> +       new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
>> +       if (!new_node)
>> +               return -ENOMEM;
>> +
>>          INIT_LIST_HEAD(&bos);
>>
>>          down_write(&rmn->lock);
>> @@ -401,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>>                  list_splice(&node->bos, &bos);
>>          }
>>
>> -       if (!node) {
>> -               node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
>> -               if (!node) {
>> -                       up_write(&rmn->lock);
>> -                       return -ENOMEM;
>> -               }
>> -       }
>> +       if (!node)
>> +               node = new_node;
>> +       else
>> +               kfree(new_node);
>>
>>          bo->mn = rmn;
>>
>> --
>> 2.7.4
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/6] drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads
       [not found]         ` <CAFCwf114qcOrKVOrt0JvxShvXdA8tdAKDCSHDK0DAJjM5rAQjg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2018-05-11  7:39           ` Oded Gabbay
@ 2018-05-11 18:11           ` Alex Deucher
  1 sibling, 0 replies; 16+ messages in thread
From: Alex Deucher @ 2018-05-11 18:11 UTC (permalink / raw)
  To: Oded Gabbay; +Cc: Felix Kuehling, amd-gfx list

On Fri, May 11, 2018 at 3:36 AM, Oded Gabbay <oded.gabbay@gmail.com> wrote:
> Hi Felix,
> The patch looks fine to me and I can ACK it, but I would prefer that
> Alex or Christian will review it as well before pushing it.

Not my area of expertise, but it looks correct to me.
Acked-by: Alex Deucher <alexander.deucher@amd.com>

> Thanks,
> Oded
>
> On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
>> This commit allows amdgpu_ttm_tt_get_user_pages to work in a worker
>> thread rather than regular process context. This will be used when
>> KFD userptr BOs are restored after an MMU-notifier eviction.
>>
>> v2: Manage task reference with get_task_struct/put_task_struct
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++++++++++++++++++++++++--------
>>  1 file changed, 29 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index c2fae04..25490fe 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -689,7 +689,7 @@ struct amdgpu_ttm_tt {
>>         struct ttm_dma_tt       ttm;
>>         u64                     offset;
>>         uint64_t                userptr;
>> -       struct mm_struct        *usermm;
>> +       struct task_struct      *usertask;
>>         uint32_t                userflags;
>>         spinlock_t              guptasklock;
>>         struct list_head        guptasks;
>> @@ -700,14 +700,18 @@ struct amdgpu_ttm_tt {
>>  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>  {
>>         struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> +       struct mm_struct *mm = gtt->usertask->mm;
>>         unsigned int flags = 0;
>>         unsigned pinned = 0;
>>         int r;
>>
>> +       if (!mm) /* Happens during process shutdown */
>> +               return -ESRCH;
>> +
>>         if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
>>                 flags |= FOLL_WRITE;
>>
>> -       down_read(&current->mm->mmap_sem);
>> +       down_read(&mm->mmap_sem);
>>
>>         if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
>>                 /* check that we only use anonymous memory
>> @@ -715,9 +719,9 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>                 unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
>>                 struct vm_area_struct *vma;
>>
>> -               vma = find_vma(gtt->usermm, gtt->userptr);
>> +               vma = find_vma(mm, gtt->userptr);
>>                 if (!vma || vma->vm_file || vma->vm_end < end) {
>> -                       up_read(&current->mm->mmap_sem);
>> +                       up_read(&mm->mmap_sem);
>>                         return -EPERM;
>>                 }
>>         }
>> @@ -733,7 +737,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>                 list_add(&guptask.list, &gtt->guptasks);
>>                 spin_unlock(&gtt->guptasklock);
>>
>> -               r = get_user_pages(userptr, num_pages, flags, p, NULL);
>> +               if (mm == current->mm)
>> +                       r = get_user_pages(userptr, num_pages, flags, p, NULL);
>> +               else
>> +                       r = get_user_pages_remote(gtt->usertask,
>> +                                       mm, userptr, num_pages,
>> +                                       flags, p, NULL, NULL);
>>
>>                 spin_lock(&gtt->guptasklock);
>>                 list_del(&guptask.list);
>> @@ -746,12 +755,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>
>>         } while (pinned < ttm->num_pages);
>>
>> -       up_read(&current->mm->mmap_sem);
>> +       up_read(&mm->mmap_sem);
>>         return 0;
>>
>>  release_pages:
>>         release_pages(pages, pinned);
>> -       up_read(&current->mm->mmap_sem);
>> +       up_read(&mm->mmap_sem);
>>         return r;
>>  }
>>
>> @@ -972,6 +981,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
>>  {
>>         struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>
>> +       if (gtt->usertask)
>> +               put_task_struct(gtt->usertask);
>> +
>>         ttm_dma_tt_fini(&gtt->ttm);
>>         kfree(gtt);
>>  }
>> @@ -1072,8 +1084,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>>                 return -EINVAL;
>>
>>         gtt->userptr = addr;
>> -       gtt->usermm = current->mm;
>>         gtt->userflags = flags;
>> +
>> +       if (gtt->usertask)
>> +               put_task_struct(gtt->usertask);
>> +       gtt->usertask = current->group_leader;
>> +       get_task_struct(gtt->usertask);
>> +
>>         spin_lock_init(&gtt->guptasklock);
>>         INIT_LIST_HEAD(&gtt->guptasks);
>>         atomic_set(&gtt->mmu_invalidations, 0);
>> @@ -1089,7 +1106,10 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
>>         if (gtt == NULL)
>>                 return NULL;
>>
>> -       return gtt->usermm;
>> +       if (gtt->usertask == NULL)
>> +               return NULL;
>> +
>> +       return gtt->usertask->mm;
>>  }
>>
>>  bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>> --
>> 2.7.4
>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/6] drm/amdkfd: GFP_NOIO while holding locks taken in MMU notifier
       [not found]         ` <CAFCwf13=swLVZ-FeHtsV9wUxpbA0ghZ2enSLvfZG8vA4g_Jc3Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2018-05-14 16:07           ` Felix Kuehling
       [not found]             ` <1f89f053-4d78-3a13-aa28-3826d858182b-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 16+ messages in thread
From: Felix Kuehling @ 2018-05-14 16:07 UTC (permalink / raw)
  To: Oded Gabbay; +Cc: amd-gfx list

On 2018-05-11 03:59 AM, Oded Gabbay wrote:
> On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling <Felix.Kuehling@amd.com> wrote:
>> When an MMU notifier runs in memory reclaim context, it can deadlock
>> trying to take locks that are already held in the thread causing the
>> memory reclaim. The solution is to avoid memory reclaim while holding
>> locks that are taken in MMU notifiers by using GFP_NOIO.
> Which locks are problematic ?

The only lock I need to take in our MMU notifier is the DQM lock.

>
> The kernel recommendation is to use "memalloc_noio_{save,restore} to
> mark the whole scope which cannot perform any IO with a short
> explanation why"

Yeah. Looking at it more, I think the correct one to use is actually
memalloc_nofs_{save,restore}.

>
> By using the scope functions, you protect against future allocation
> code that will be written in the critical path, without worrying about
> the developer using the correct GFP_NOIO flag.

Yes. Last time I looked into this it was broken and didn't properly
handle kmalloc allocations. It looks like this was fixed by this commit:

    commit 6d7225f0cc1a1fc32cf5dd01b4ab4b8a34c7cdb4
    Author: Nikolay Borisov <nborisov@suse.com>
    Date:   Wed May 3 14:53:05 2017 -0700

        lockdep: teach lockdep about memalloc_noio_save


Later NOFS was introduced, which is now used by the lockdep checker to
detect reclaim deadlocks.

Regards,
  Felix

>
> Oded
>
>> This commit fixes memory allocations done while holding the dqm->lock
>> which is needed in the MMU notifier (dqm->ops.evict_process_queues).
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdkfd/kfd_device.c          | 2 +-
>>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 2 +-
>>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  | 2 +-
>>  3 files changed, 3 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index 334669996..0434f65 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -652,7 +652,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
>>         if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
>>                 return -ENOMEM;
>>
>> -       *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
>> +       *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
>>         if ((*mem_obj) == NULL)
>>                 return -ENOMEM;
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
>> index c00c325..2bc49c6 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
>> @@ -412,7 +412,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
>>         if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
>>                 return NULL;
>>
>> -       mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
>> +       mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
>>         if (!mqd)
>>                 return NULL;
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
>> index 89e4242..481307b 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
>> @@ -394,7 +394,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
>>         if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
>>                 return NULL;
>>
>> -       mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
>> +       mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
>>         if (!mqd)
>>                 return NULL;
>>
>> --
>> 2.7.4
>>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 4/6] drm/amdkfd: GFP_NOIO while holding locks taken in MMU notifier
       [not found]             ` <1f89f053-4d78-3a13-aa28-3826d858182b-5C7GfCeVMHo@public.gmane.org>
@ 2018-05-14 17:20               ` Oded Gabbay
  0 siblings, 0 replies; 16+ messages in thread
From: Oded Gabbay @ 2018-05-14 17:20 UTC (permalink / raw)
  To: Kuehling, Felix; +Cc: amd-gfx list


[-- Attachment #1.1: Type: text/plain, Size: 4090 bytes --]

Cool, thanks!

On Mon, 14 May 2018, 19:07 Felix Kuehling <felix.kuehling-5C7GfCeVMHo@public.gmane.org> wrote:

> On 2018-05-11 03:59 AM, Oded Gabbay wrote:
> > On Fri, Mar 23, 2018 at 10:32 PM, Felix Kuehling <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
> wrote:
> >> When an MMU notifier runs in memory reclaim context, it can deadlock
> >> trying to take locks that are already held in the thread causing the
> >> memory reclaim. The solution is to avoid memory reclaim while holding
> >> locks that are taken in MMU notifiers by using GFP_NOIO.
> > Which locks are problematic ?
>
> The only lock I need to take in our MMU notifier is the DQM lock.
>
> >
> > The kernel recommendation is to use "memalloc_noio_{save,restore} to
> > mark the whole scope which cannot perform any IO with a short
> > explanation why"
>
> Yeah. Looking at it more, I think the correct one to use is actually
> memalloc_nofs_{save,restore}.
>
> >
> > By using the scope functions, you protect against future allocation
> > code that will be written in the critical path, without worrying about
> > the developer using the correct GFP_NOIO flag.
>
> Yes. Last time I looked into this it was broken and didn't properly
> handle kmalloc allocations. It looks like this was fixed by this commit:
>
>     commit 6d7225f0cc1a1fc32cf5dd01b4ab4b8a34c7cdb4
>     Author: Nikolay Borisov <nborisov-IBi9RG/b67k@public.gmane.org>
>     Date:   Wed May 3 14:53:05 2017 -0700
>
>         lockdep: teach lockdep about memalloc_noio_save
>
>
> Later NOFS was introduced, which is now used by the lockdep checker to
> detect reclaim deadlocks.
>
> Regards,
>   Felix
>
> >
> > Oded
> >
> >> This commit fixes memory allocations done while holding the dqm->lock
> >> which is needed in the MMU notifier (dqm->ops.evict_process_queues).
> >>
> >> Signed-off-by: Felix Kuehling <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
> >> ---
> >>  drivers/gpu/drm/amd/amdkfd/kfd_device.c          | 2 +-
> >>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 2 +-
> >>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  | 2 +-
> >>  3 files changed, 3 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> >> index 334669996..0434f65 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> >> @@ -652,7 +652,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd,
> unsigned int size,
> >>         if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
> >>                 return -ENOMEM;
> >>
> >> -       *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
> >> +       *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
> >>         if ((*mem_obj) == NULL)
> >>                 return -ENOMEM;
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> >> index c00c325..2bc49c6 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> >> @@ -412,7 +412,7 @@ struct mqd_manager *mqd_manager_init_cik(enum
> KFD_MQD_TYPE type,
> >>         if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
> >>                 return NULL;
> >>
> >> -       mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
> >> +       mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
> >>         if (!mqd)
> >>                 return NULL;
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> >> index 89e4242..481307b 100644
> >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> >> @@ -394,7 +394,7 @@ struct mqd_manager *mqd_manager_init_vi(enum
> KFD_MQD_TYPE type,
> >>         if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
> >>                 return NULL;
> >>
> >> -       mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
> >> +       mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
> >>         if (!mqd)
> >>                 return NULL;
> >>
> >> --
> >> 2.7.4
> >>
>
>

[-- Attachment #1.2: Type: text/html, Size: 5425 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2018-05-14 17:20 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-23 19:32 [PATCH 0/6] Userptr memory mapping support for KFD Felix Kuehling
     [not found] ` <1521833553-31571-1-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2018-03-23 19:32   ` [PATCH 1/6] drm/amdgpu: Add MMU notifier type for KFD userptr Felix Kuehling
2018-03-23 19:32   ` [PATCH 2/6] drm/amdgpu: Enable amdgpu_ttm_tt_get_user_pages in worker threads Felix Kuehling
     [not found]     ` <1521833553-31571-3-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2018-05-11  7:36       ` Oded Gabbay
     [not found]         ` <CAFCwf114qcOrKVOrt0JvxShvXdA8tdAKDCSHDK0DAJjM5rAQjg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2018-05-11  7:39           ` Oded Gabbay
2018-05-11 18:11           ` Alex Deucher
2018-03-23 19:32   ` [PATCH 3/6] drm/amdgpu: Avoid reclaim while holding locks taken in MMU notifier Felix Kuehling
     [not found]     ` <1521833553-31571-4-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2018-05-11  7:38       ` Oded Gabbay
     [not found]         ` <CAFCwf13ihv88TGtSWXovmOawkcboMRuVOxih-R_-CnJC5h1=Qw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2018-05-11  9:06           ` Christian König
2018-03-23 19:32   ` [PATCH 4/6] drm/amdkfd: GFP_NOIO " Felix Kuehling
     [not found]     ` <1521833553-31571-5-git-send-email-Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
2018-05-11  7:59       ` Oded Gabbay
     [not found]         ` <CAFCwf13=swLVZ-FeHtsV9wUxpbA0ghZ2enSLvfZG8vA4g_Jc3Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2018-05-14 16:07           ` Felix Kuehling
     [not found]             ` <1f89f053-4d78-3a13-aa28-3826d858182b-5C7GfCeVMHo@public.gmane.org>
2018-05-14 17:20               ` Oded Gabbay
2018-03-23 19:32   ` [PATCH 5/6] drm/amdkfd: Add quiesce_mm and resume_mm to kgd2kfd_calls Felix Kuehling
2018-03-23 19:32   ` [PATCH 6/6] drm/amdgpu: Add userptr support for KFD Felix Kuehling
2018-05-11  8:27   ` [PATCH 0/6] Userptr memory mapping " Oded Gabbay

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.