All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
@ 2018-12-06 21:02 Yang, Philip
       [not found] ` <20181206210235.12036-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: Yang, Philip @ 2018-12-06 21:02 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Yang, Philip

Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
DRM_AMDGPU_USERPTR Kconfig.

It supports both KFD userptr and gfx userptr paths.

The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
kernel now.

Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
 drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 122 ++++++++++---------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
 4 files changed, 55 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 9221e5489069..960a63355705 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
 config DRM_AMDGPU_USERPTR
 	bool "Always enable userptr write support"
 	depends on DRM_AMDGPU
-	select MMU_NOTIFIER
+	select HMM_MIRROR
 	help
-	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
-	  selected to enabled full userptr support.
+	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
+	  isn't already selected to enabled full userptr support.
 
 config DRM_AMDGPU_GART_DEBUGFS
 	bool "Allow GART access through debugfs"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index f76bcb9c45e4..675efc850ff4 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -172,7 +172,7 @@ endif
 amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
 amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
 amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
 
 include $(FULL_AMD_PATH)/powerplay/Makefile
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index e55508b39496..5d518d2bb9be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -45,7 +45,7 @@
 
 #include <linux/firmware.h>
 #include <linux/module.h>
-#include <linux/mmu_notifier.h>
+#include <linux/hmm.h>
 #include <linux/interval_tree.h>
 #include <drm/drmP.h>
 #include <drm/drm.h>
@@ -58,7 +58,6 @@
  *
  * @adev: amdgpu device pointer
  * @mm: process address space
- * @mn: MMU notifier structure
  * @type: type of MMU notifier
  * @work: destruction work item
  * @node: hash table node to find structure by adev and mn
@@ -66,6 +65,7 @@
  * @objects: interval tree containing amdgpu_mn_nodes
  * @read_lock: mutex for recursive locking of @lock
  * @recursion: depth of recursion
+ * @mirror: HMM mirror function support
  *
  * Data for each amdgpu device and process address space.
  */
@@ -73,7 +73,6 @@ struct amdgpu_mn {
 	/* constant after initialisation */
 	struct amdgpu_device	*adev;
 	struct mm_struct	*mm;
-	struct mmu_notifier	mn;
 	enum amdgpu_mn_type	type;
 
 	/* only used on destruction */
@@ -87,6 +86,9 @@ struct amdgpu_mn {
 	struct rb_root_cached	objects;
 	struct mutex		read_lock;
 	atomic_t		recursion;
+
+	/* HMM mirror */
+	struct hmm_mirror	mirror;
 };
 
 /**
@@ -103,7 +105,7 @@ struct amdgpu_mn_node {
 };
 
 /**
- * amdgpu_mn_destroy - destroy the MMU notifier
+ * amdgpu_mn_destroy - destroy the HMM mirror
  *
  * @work: previously sheduled work item
  *
@@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
 	}
 	up_write(&amn->lock);
 	mutex_unlock(&adev->mn_lock);
-	mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
+
+	hmm_mirror_unregister(&amn->mirror);
 	kfree(amn);
 }
 
 /**
- * amdgpu_mn_release - callback to notify about mm destruction
+ * amdgpu_hmm_mirror_release - callback to notify about mm destruction
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
+ * @mirror: the HMM mirror (mm) this callback is about
  *
- * Shedule a work item to lazy destroy our notifier.
+ * Shedule a work item to lazy destroy HMM mirror.
  */
-static void amdgpu_mn_release(struct mmu_notifier *mn,
-			      struct mm_struct *mm)
+static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
 
 	INIT_WORK(&amn->work, amdgpu_mn_destroy);
 	schedule_work(&amn->work);
 }
 
-
 /**
  * amdgpu_mn_lock - take the write side lock for this notifier
  *
@@ -237,21 +237,19 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 /**
  * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
  *
  * Block for operations on BOs to finish and mark pages as accessed and
  * potentially dirty.
  */
-static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
-						 struct mm_struct *mm,
-						 unsigned long start,
-						 unsigned long end,
-						 bool blockable)
+static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
+			const struct hmm_update *update)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+	unsigned long start = update->start;
+	unsigned long end = update->end;
+	bool blockable = update->blockable;
 	struct interval_tree_node *it;
 
 	/* notification is exclusive, but interval is inclusive */
@@ -278,28 +276,28 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 		amdgpu_mn_invalidate_node(node, start, end);
 	}
 
+	amdgpu_mn_read_unlock(amn);
+
 	return 0;
 }
 
 /**
  * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
  *
  * We temporarily evict all BOs between start and end. This
  * necessitates evicting all user-mode queues of the process. The BOs
  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
  */
-static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
-						 struct mm_struct *mm,
-						 unsigned long start,
-						 unsigned long end,
-						 bool blockable)
+static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
+			const struct hmm_update *update)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+	unsigned long start = update->start;
+	unsigned long end = update->end;
+	bool blockable = update->blockable;
 	struct interval_tree_node *it;
 
 	/* notification is exclusive, but interval is inclusive */
@@ -326,59 +324,39 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 
 			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
 							 start, end))
-				amdgpu_amdkfd_evict_userptr(mem, mm);
+				amdgpu_amdkfd_evict_userptr(mem, amn->mm);
 		}
 	}
 
+	amdgpu_mn_read_unlock(amn);
+
 	return 0;
 }
 
-/**
- * amdgpu_mn_invalidate_range_end - callback to notify about mm change
- *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
- *
- * Release the lock again to allow new command submissions.
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
  */
-static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
-					   struct mm_struct *mm,
-					   unsigned long start,
-					   unsigned long end)
-{
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
-
-	amdgpu_mn_read_unlock(amn);
-}
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
 
-static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
 	[AMDGPU_MN_TYPE_GFX] = {
-		.release = amdgpu_mn_release,
-		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
-		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+		.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
+		.release = amdgpu_hmm_mirror_release
 	},
 	[AMDGPU_MN_TYPE_HSA] = {
-		.release = amdgpu_mn_release,
-		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
-		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+		.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
+		.release = amdgpu_hmm_mirror_release
 	},
 };
 
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
 /**
- * amdgpu_mn_get - create notifier context
+ * amdgpu_mn_get - create HMM mirror context
  *
  * @adev: amdgpu device pointer
  * @type: type of MMU notifier context
  *
- * Creates a notifier context for current->mm.
+ * Creates a HMM mirror context for current->mm.
  */
 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 				enum amdgpu_mn_type type)
@@ -408,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 	amn->mm = mm;
 	init_rwsem(&amn->lock);
 	amn->type = type;
-	amn->mn.ops = &amdgpu_mn_ops[type];
 	amn->objects = RB_ROOT_CACHED;
 	mutex_init(&amn->read_lock);
 	atomic_set(&amn->recursion, 0);
 
-	r = __mmu_notifier_register(&amn->mn, mm);
+	amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
+	r = hmm_mirror_register(&amn->mirror, mm);
 	if (r)
 		goto free_amn;
 
@@ -439,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
  * @bo: amdgpu buffer object
  * @addr: userptr addr we should monitor
  *
- * Registers an MMU notifier for the given BO at the specified address.
+ * Registers an HMM mirror for the given BO at the specified address.
  * Returns 0 on success, -ERRNO if anything goes wrong.
  */
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
@@ -495,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 }
 
 /**
- * amdgpu_mn_unregister - unregister a BO for notifier updates
+ * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
  *
  * @bo: amdgpu buffer object
  *
- * Remove any registration of MMU notifier updates from the buffer object.
+ * Remove any registration of HMM mirror updates from the buffer object.
  */
 void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index eb0f432f78fe..0a51fd00021c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -34,7 +34,7 @@ enum amdgpu_mn_type {
 	AMDGPU_MN_TYPE_HSA,
 };
 
-#if defined(CONFIG_MMU_NOTIFIER)
+#if defined(CONFIG_HMM_MIRROR)
 void amdgpu_mn_lock(struct amdgpu_mn *mn);
 void amdgpu_mn_unlock(struct amdgpu_mn *mn);
 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency
       [not found] ` <20181206210235.12036-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-06 21:02   ` Yang, Philip
  2018-12-06 21:02   ` [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v3 Yang, Philip
  2018-12-07 12:00   ` [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6 Zhou, David(ChunMing)
  2 siblings, 0 replies; 13+ messages in thread
From: Yang, Philip @ 2018-12-06 21:02 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Yang, Philip

There is circular lock between gfx and kfd path with HMM change:
lock(dqm) -> bo::reserve -> amdgpu_mn_lock

To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested
locking between mmap_sem and bo::reserve. The locking order
is: bo::reserve -> amdgpu_mn_lock(p->mn)

Change-Id: I2ec09a47571f6b4c8eaef93f22c0a600f5f70153
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 67 ++++++++++---------
 1 file changed, 36 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 8372556b52eb..fe120cc0930c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1158,6 +1158,33 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 
 	retval = 0;
 
+	/* Do init_mqd before dqm_lock(dqm) to avoid circular locking order:
+	 * lock(dqm) -> bo::reserve
+	 */
+	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
+			get_mqd_type_from_queue_type(q->properties.type));
+
+	if (!mqd_mgr) {
+		retval = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * Eviction state logic: we only mark active queues as evicted
+	 * to avoid the overhead of restoring inactive queues later
+	 */
+	if (qpd->evicted)
+		q->properties.is_evicted = (q->properties.queue_size > 0 &&
+					    q->properties.queue_percent > 0 &&
+					    q->properties.queue_address != 0);
+	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
+	q->properties.tba_addr = qpd->tba_addr;
+	q->properties.tma_addr = qpd->tma_addr;
+	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
+				&q->gart_mqd_addr, &q->properties);
+	if (retval)
+		goto out;
+
 	dqm_lock(dqm);
 
 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
@@ -1181,30 +1208,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	if (retval)
 		goto out_deallocate_sdma_queue;
 
-	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
-			get_mqd_type_from_queue_type(q->properties.type));
-
-	if (!mqd_mgr) {
-		retval = -ENOMEM;
-		goto out_deallocate_doorbell;
-	}
-	/*
-	 * Eviction state logic: we only mark active queues as evicted
-	 * to avoid the overhead of restoring inactive queues later
-	 */
-	if (qpd->evicted)
-		q->properties.is_evicted = (q->properties.queue_size > 0 &&
-					    q->properties.queue_percent > 0 &&
-					    q->properties.queue_address != 0);
-
-	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
-
-	q->properties.tba_addr = qpd->tba_addr;
-	q->properties.tma_addr = qpd->tma_addr;
-	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
-				&q->gart_mqd_addr, &q->properties);
-	if (retval)
-		goto out_deallocate_doorbell;
 
 	list_add(&q->list, &qpd->queues_list);
 	qpd->queue_count++;
@@ -1228,14 +1231,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	dqm_unlock(dqm);
 	return retval;
 
-out_deallocate_doorbell:
-	deallocate_doorbell(qpd, q);
 out_deallocate_sdma_queue:
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 		deallocate_sdma_queue(dqm, q->sdma_id);
 out_unlock:
 	dqm_unlock(dqm);
-
+out:
 	return retval;
 }
 
@@ -1398,8 +1399,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 			qpd->reset_wavefronts = true;
 	}
 
-	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
-
 	/*
 	 * Unconditionally decrement this counter, regardless of the queue's
 	 * type
@@ -1410,6 +1409,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 
 	dqm_unlock(dqm);
 
+	/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
+	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+
 	return retval;
 
 failed:
@@ -1631,7 +1633,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
 		qpd->reset_wavefronts = false;
 	}
 
-	/* lastly, free mqd resources */
+	dqm_unlock(dqm);
+
+	/* Lastly, free mqd resources.
+	 * Do uninit_mqd() after dqm_unlock to avoid circular locking.
+	 */
 	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
 		mqd_mgr = dqm->ops.get_mqd_manager(dqm,
 			get_mqd_type_from_queue_type(q->properties.type));
@@ -1645,7 +1651,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
 	}
 
 out:
-	dqm_unlock(dqm);
 	return retval;
 }
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v3
       [not found] ` <20181206210235.12036-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
  2018-12-06 21:02   ` [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency Yang, Philip
@ 2018-12-06 21:02   ` Yang, Philip
       [not found]     ` <20181206210235.12036-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
  2018-12-07 12:00   ` [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6 Zhou, David(ChunMing)
  2 siblings, 1 reply; 13+ messages in thread
From: Yang, Philip @ 2018-12-06 21:02 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Yang, Philip

Use HMM helper function hmm_vma_fault() to get physical pages backing
userptr and start CPU page table update track of those pages. Then use
hmm_vma_range_done() to check if those pages are updated before
amdgpu_cs_submit for gfx or before user queues are resumed for kfd.

If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
from scratch, for kfd, restore worker is rescheduled to retry.

HMM simplify the CPU page table concurrent update check, so remove
guptasklock, mmu_invalidations, last_set_pages fields from
amdgpu_ttm_tt struct.

HMM does not pin the page (increase page ref count), so remove related
operations like release_pages(), put_page(), mark_page_dirty().

Change-Id: Id2d3130378b44a774e0d77156102a20a203b5036
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   1 -
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  81 ++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c   |   2 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 188 +++++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  14 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c        |  34 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h        |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 160 ++++++---------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |   1 -
 11 files changed, 206 insertions(+), 289 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index bcf587b4ba98..b492dd9e541a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -62,7 +62,6 @@ struct kgd_mem {
 
 	atomic_t invalid;
 	struct amdkfd_process_info *process_info;
-	struct page **user_pages;
 
 	struct amdgpu_sync sync;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index b29ef088fa14..75833b40507b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -580,28 +580,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 		goto out;
 	}
 
-	/* If no restore worker is running concurrently, user_pages
-	 * should not be allocated
-	 */
-	WARN(mem->user_pages, "Leaking user_pages array");
-
-	mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
-					   sizeof(struct page *),
-					   GFP_KERNEL | __GFP_ZERO);
-	if (!mem->user_pages) {
-		pr_err("%s: Failed to allocate pages array\n", __func__);
-		ret = -ENOMEM;
-		goto unregister_out;
-	}
-
-	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
 	if (ret) {
 		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
-		goto free_out;
+		goto unregister_out;
 	}
 
-	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
-
 	ret = amdgpu_bo_reserve(bo, true);
 	if (ret) {
 		pr_err("%s: Failed to reserve BO\n", __func__);
@@ -614,11 +598,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 	amdgpu_bo_unreserve(bo);
 
 release_out:
-	if (ret)
-		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
-free_out:
-	kvfree(mem->user_pages);
-	mem->user_pages = NULL;
+	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 unregister_out:
 	if (ret)
 		amdgpu_mn_unregister(bo);
@@ -677,7 +657,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.num_shared = 1;
-	ctx->kfd_bo.user_pages = NULL;
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
 	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
@@ -741,7 +720,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.num_shared = 1;
-	ctx->kfd_bo.user_pages = NULL;
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
 	i = 0;
@@ -1329,15 +1307,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	list_del(&bo_list_entry->head);
 	mutex_unlock(&process_info->lock);
 
-	/* Free user pages if necessary */
-	if (mem->user_pages) {
-		pr_debug("%s: Freeing user_pages array\n", __func__);
-		if (mem->user_pages[0])
-			release_pages(mem->user_pages,
-					mem->bo->tbo.ttm->num_pages);
-		kvfree(mem->user_pages);
-	}
-
 	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
 	if (unlikely(ret))
 		return ret;
@@ -1751,25 +1720,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 
 		bo = mem->bo;
 
-		if (!mem->user_pages) {
-			mem->user_pages =
-				kvmalloc_array(bo->tbo.ttm->num_pages,
-						 sizeof(struct page *),
-						 GFP_KERNEL | __GFP_ZERO);
-			if (!mem->user_pages) {
-				pr_err("%s: Failed to allocate pages array\n",
-				       __func__);
-				return -ENOMEM;
-			}
-		} else if (mem->user_pages[0]) {
-			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
-		}
-
 		/* Get updated user pages */
 		ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
-						   mem->user_pages);
+						   bo->tbo.ttm->pages);
 		if (ret) {
-			mem->user_pages[0] = NULL;
+			bo->tbo.ttm->pages[0] = NULL;
 			pr_info("%s: Failed to get user pages: %d\n",
 				__func__, ret);
 			/* Pretend it succeeded. It will fail later
@@ -1778,12 +1733,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 			 * stalled user mode queues.
 			 */
 		}
-
-		/* Mark the BO as valid unless it was invalidated
-		 * again concurrently
-		 */
-		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
-			return -EAGAIN;
 	}
 
 	return 0;
@@ -1863,10 +1812,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 
 		bo = mem->bo;
 
-		/* Copy pages array and validate the BO if we got user pages */
-		if (mem->user_pages[0]) {
-			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
-						     mem->user_pages);
+		/* Validate the BO if we got user pages */
+		if (bo->tbo.ttm->pages[0]) {
 			amdgpu_bo_placement_from_domain(bo, mem->domain);
 			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 			if (ret) {
@@ -1875,16 +1822,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 			}
 		}
 
-		/* Validate succeeded, now the BO owns the pages, free
-		 * our copy of the pointer array. Put this BO back on
-		 * the userptr_valid_list. If we need to revalidate
-		 * it, we need to start from scratch.
-		 */
-		kvfree(mem->user_pages);
-		mem->user_pages = NULL;
 		list_move_tail(&mem->validate_list.head,
 			       &process_info->userptr_valid_list);
 
+		/* Stop HMM track the userptr update. We dont check the return
+		 * value for concurrent CPU page table update because we will
+		 * reschedule the restore worker if process_info->evicted_bos
+		 * is updated.
+		 */
+		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+
 		/* Update mapping. If the BO was not validated
 		 * (because we couldn't get user pages), this will
 		 * clear the page table entries, which will result in
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 5c79da8e1150..3842183970cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -200,8 +200,6 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 
 		if (!bo->parent)
 			list_add_tail(&e->tv.head, &bucket[priority]);
-
-		e->user_pages = NULL;
 	}
 
 	/* Connect the sorted buckets in the output list. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 7c5f5d1601e6..4beab2de09b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -35,8 +35,7 @@ struct amdgpu_bo_list_entry {
 	struct ttm_validate_buffer	tv;
 	struct amdgpu_bo_va		*bo_va;
 	uint32_t			priority;
-	struct page			**user_pages;
-	int				user_invalidated;
+	bool				user_invalidated;
 };
 
 struct amdgpu_bo_list {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 149b3065119b..4a3466300efc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	p->uf_entry.tv.bo = &bo->tbo;
 	/* One for TTM and one for the CS job */
 	p->uf_entry.tv.num_shared = 2;
-	p->uf_entry.user_pages = NULL;
 
 	drm_gem_object_put_unlocked(gobj);
 
@@ -532,24 +531,19 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 
 	list_for_each_entry(lobj, validated, tv.head) {
 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
-		bool binding_userptr = false;
 		struct mm_struct *usermm;
 
 		usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
 		if (usermm && usermm != current->mm)
 			return -EPERM;
 
-		/* Check if we have user pages and nobody bound the BO already */
-		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
-		    lobj->user_pages) {
+		if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
+		    lobj->user_invalidated) {
 			amdgpu_bo_placement_from_domain(bo,
 							AMDGPU_GEM_DOMAIN_CPU);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 			if (r)
 				return r;
-			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
-						     lobj->user_pages);
-			binding_userptr = true;
 		}
 
 		if (p->evictable == lobj)
@@ -558,11 +552,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 		r = amdgpu_cs_validate(p, bo);
 		if (r)
 			return r;
-
-		if (binding_userptr) {
-			kvfree(lobj->user_pages);
-			lobj->user_pages = NULL;
-		}
 	}
 	return 0;
 }
@@ -577,7 +566,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	struct amdgpu_bo *gds;
 	struct amdgpu_bo *gws;
 	struct amdgpu_bo *oa;
-	unsigned tries = 10;
+	struct list_head userpage_list;
 	int r;
 
 	INIT_LIST_HEAD(&p->validated);
@@ -586,7 +575,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (cs->in.bo_list_handle) {
 		if (p->bo_list)
 			return -EINVAL;
-
 		r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
 				       &p->bo_list);
 		if (r)
@@ -605,7 +593,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
 	amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 	if (p->bo_list->first_userptr != p->bo_list->num_entries)
-		p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
+		p->mn = amdgpu_mn_get(current->mm, p->adev,
+					AMDGPU_MN_TYPE_GFX);
 
 	INIT_LIST_HEAD(&duplicates);
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
@@ -613,79 +602,72 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
 		list_add(&p->uf_entry.tv.head, &p->validated);
 
-	while (1) {
-		struct list_head need_pages;
+	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+				   &duplicates);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS)
+			DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
+		goto out;
+	}
 
-		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-					   &duplicates);
-		if (unlikely(r != 0)) {
-			if (r != -ERESTARTSYS)
-				DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
-			goto error_free_pages;
-		}
+	INIT_LIST_HEAD(&userpage_list);
+	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
-		INIT_LIST_HEAD(&need_pages);
-		amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+		list_del(&e->tv.head);
+		list_add(&e->tv.head, &userpage_list);
+		amdgpu_bo_unreserve(bo);
+	}
 
-			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
-				 &e->user_invalidated) && e->user_pages) {
+	/* Get userptr backing pages. If pages are updated after registered
+	 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
+	 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
+	 */
+	if (!list_empty(&userpage_list)) {
+		/* Unreserve everything again, to avoid circular locking case
+		 * bo::reserve -> mmap->sem
+		 */
+		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
 
-				/* We acquired a page array, but somebody
-				 * invalidated it. Free it and try again
-				 */
-				release_pages(e->user_pages,
-					      bo->tbo.ttm->num_pages);
-				kvfree(e->user_pages);
-				e->user_pages = NULL;
+		list_for_each_entry(e, &userpage_list, tv.head) {
+			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+			bool userpage_invalidated = false;
+			struct page **pages;
+			int i;
+
+			pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+						sizeof(*pages),
+						GFP_KERNEL | __GFP_ZERO);
+			if (!pages) {
+				DRM_ERROR("calloc failure\n");
+				return -ENOMEM;
 			}
 
-			if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
-			    !e->user_pages) {
-				list_del(&e->tv.head);
-				list_add(&e->tv.head, &need_pages);
-
-				amdgpu_bo_unreserve(bo);
+			r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, pages);
+			if (r) {
+				kvfree(pages);
+				return r;
 			}
-		}
 
-		if (list_empty(&need_pages))
-			break;
-
-		/* Unreserve everything again. */
-		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
-		/* We tried too many times, just abort */
-		if (!--tries) {
-			r = -EDEADLK;
-			DRM_ERROR("deadlock in %s\n", __func__);
-			goto error_free_pages;
+			for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
+				if (bo->tbo.ttm->pages[i] != pages[i]) {
+					bo->tbo.ttm->pages[i] = pages[i];
+					userpage_invalidated = true;
+				}
+			}
+			e->user_invalidated = userpage_invalidated;
+			kvfree(pages);
 		}
 
-		/* Fill the page arrays for all userptrs. */
-		list_for_each_entry(e, &need_pages, tv.head) {
-			struct ttm_tt *ttm = e->tv.bo->ttm;
-
-			e->user_pages = kvmalloc_array(ttm->num_pages,
-							 sizeof(struct page*),
-							 GFP_KERNEL | __GFP_ZERO);
-			if (!e->user_pages) {
-				r = -ENOMEM;
-				DRM_ERROR("calloc failure in %s\n", __func__);
-				goto error_free_pages;
-			}
+		list_splice(&userpage_list, &p->validated);
 
-			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
-			if (r) {
-				DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
-				kvfree(e->user_pages);
-				e->user_pages = NULL;
-				goto error_free_pages;
-			}
+		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+					   &duplicates);
+		if (unlikely(r != 0)) {
+			if (r != -ERESTARTSYS)
+				DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
+			goto out;
 		}
-
-		/* And try again. */
-		list_splice(&need_pages, &p->validated);
 	}
 
 	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -754,17 +736,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 error_validate:
 	if (r)
 		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
-error_free_pages:
-
-	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-		if (!e->user_pages)
-			continue;
-
-		release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
-		kvfree(e->user_pages);
-	}
-
+out:
 	return r;
 }
 
@@ -1213,8 +1185,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	struct amdgpu_bo_list_entry *e;
 	struct amdgpu_job *job;
 	uint64_t seq;
-
-	int r;
+	int r = 0;
 
 	job = p->job;
 	p->job = NULL;
@@ -1223,15 +1194,21 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	if (r)
 		goto error_unlock;
 
-	/* No memory allocation is allowed while holding the mn lock */
+	/* No memory allocation is allowed while holding the mn lock.
+	 * p->mn is hold until amdgpu_cs_submit is finished and fence is added
+	 * to BOs.
+	 */
 	amdgpu_mn_lock(p->mn);
+
+	/* If userptr are updated after amdgpu_cs_parser_bos(), restart cs */
 	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
-		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
-			r = -ERESTARTSYS;
-			goto error_abort;
-		}
+		r |= amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+	}
+	if (r) {
+		r = -ERESTARTSYS;
+		goto error_abort;
 	}
 
 	job->owner = p->filp;
@@ -1278,14 +1255,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
 	struct amdgpu_device *adev = dev->dev_private;
-	union drm_amdgpu_cs *cs = data;
-	struct amdgpu_cs_parser parser = {};
-	bool reserved_buffers = false;
+	union drm_amdgpu_cs *cs;
+	struct amdgpu_cs_parser parser;
+	bool reserved_buffers;
+	int tries = 10;
 	int i, r;
 
 	if (!adev->accel_working)
 		return -EBUSY;
 
+restart:
+	memset(&parser, 0, sizeof(parser));
+	cs = data;
+	reserved_buffers = false;
+
 	parser.adev = adev;
 	parser.filp = filp;
 
@@ -1327,6 +1310,15 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
 out:
 	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+
+	if (r == -ERESTARTSYS) {
+		if (!--tries) {
+			DRM_ERROR("Possible deadlock? Retry too many times\n");
+			return -EDEADLK;
+		}
+		goto restart;
+	}
+
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index f4f00217546e..92025993bfb1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -336,26 +336,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 
 		r = amdgpu_bo_reserve(bo, true);
 		if (r)
-			goto free_pages;
+			goto user_pages_done;
 
 		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		amdgpu_bo_unreserve(bo);
 		if (r)
-			goto free_pages;
+			goto user_pages_done;
 	}
 
 	r = drm_gem_handle_create(filp, gobj, &handle);
-	/* drop reference from allocate - handle holds it now */
-	drm_gem_object_put_unlocked(gobj);
 	if (r)
-		return r;
+		goto user_pages_done;
 
 	args->handle = handle;
-	return 0;
 
-free_pages:
-	release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
+user_pages_done:
+	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
+		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 
 release_object:
 	drm_gem_object_put_unlocked(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 5d518d2bb9be..c8002ee1d415 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 			true, false, MAX_SCHEDULE_TIMEOUT);
 		if (r <= 0)
 			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-
-		amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
 	}
 }
 
@@ -353,15 +351,16 @@ static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
 /**
  * amdgpu_mn_get - create HMM mirror context
  *
+ * @mm: the mm struct
  * @adev: amdgpu device pointer
  * @type: type of MMU notifier context
  *
- * Creates a HMM mirror context for current->mm.
+ * Creates a HMM mirror context for mm.
  */
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
+				struct amdgpu_device *adev,
 				enum amdgpu_mn_type type)
 {
-	struct mm_struct *mm = current->mm;
 	struct amdgpu_mn *amn;
 	unsigned long key = AMDGPU_MN_KEY(mm, type);
 	int r;
@@ -431,7 +430,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 	struct list_head bos;
 	struct interval_tree_node *it;
 
-	amn = amdgpu_mn_get(adev, type);
+	amn = amdgpu_mn_get(current->mm, adev, type);
 	if (IS_ERR(amn))
 		return PTR_ERR(amn);
 
@@ -513,3 +512,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 	mutex_unlock(&adev->mn_lock);
 }
 
+/* flags used by HMM internal, not related to CPU/GPU PTE flags */
+static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
+		(1 << 0), /* HMM_PFN_VALID */
+		(1 << 1), /* HMM_PFN_WRITE */
+		0 /* HMM_PFN_DEVICE_PRIVATE */
+};
+
+static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
+		0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
+		0, /* HMM_PFN_NONE */
+		0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
+};
+
+void amdgpu_hmm_init_range(struct hmm_range *range)
+{
+	if (range) {
+		range->flags = hmm_range_flags;
+		range->values = hmm_range_values;
+		range->pfn_shift = PAGE_SHIFT;
+		range->pfns = NULL;
+		INIT_LIST_HEAD(&range->list);
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index 0a51fd00021c..4c5e86847a3b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -25,9 +25,10 @@
 #define __AMDGPU_MN_H__
 
 /*
- * MMU Notifier
+ * HMM mirror
  */
 struct amdgpu_mn;
+struct hmm_range;
 
 enum amdgpu_mn_type {
 	AMDGPU_MN_TYPE_GFX,
@@ -37,10 +38,12 @@ enum amdgpu_mn_type {
 #if defined(CONFIG_HMM_MIRROR)
 void amdgpu_mn_lock(struct amdgpu_mn *mn);
 void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
+				struct amdgpu_device *adev,
 				enum amdgpu_mn_type type);
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
+void amdgpu_hmm_init_range(struct hmm_range *range);
 #else
 static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
 static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c91ec3101d00..33057d6114c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
 #include <linux/pagemap.h>
 #include <linux/debugfs.h>
 #include <linux/iommu.h>
+#include <linux/hmm.h>
 #include "amdgpu.h"
 #include "amdgpu_object.h"
 #include "amdgpu_trace.h"
@@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
 /*
  * TTM backend functions.
  */
-struct amdgpu_ttm_gup_task_list {
-	struct list_head	list;
-	struct task_struct	*task;
-};
-
 struct amdgpu_ttm_tt {
 	struct ttm_dma_tt	ttm;
 	u64			offset;
@@ -718,107 +714,105 @@ struct amdgpu_ttm_tt {
 	uint32_t		userflags;
 	spinlock_t              guptasklock;
 	struct list_head        guptasks;
-	atomic_t		mmu_invalidations;
-	uint32_t		last_set_pages;
+	struct hmm_range	range;
 };
 
 /**
- * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
- * pointer to memory
+ * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
+ * memory and start HMM tracking CPU page table update
  *
- * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
- * This provides a wrapper around the get_user_pages() call to provide
- * device accessible pages that back user memory.
+ * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
+ * once afterwards to stop HMM tracking
  */
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 	struct mm_struct *mm = gtt->usertask->mm;
-	unsigned int flags = 0;
-	unsigned pinned = 0;
-	int r;
+	unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
+	struct hmm_range *range = &gtt->range;
+	int r, i;
 
 	if (!mm) /* Happens during process shutdown */
 		return -ESRCH;
 
-	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
-		flags |= FOLL_WRITE;
+	amdgpu_hmm_init_range(range);
 
 	down_read(&mm->mmap_sem);
 
+	range->vma = find_vma(mm, gtt->userptr);
 	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
 		/*
 		 * check that we only use anonymous memory to prevent problems
 		 * with writeback
 		 */
-		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
-		struct vm_area_struct *vma;
+		struct vm_area_struct *vma = range->vma;
 
-		vma = find_vma(mm, gtt->userptr);
-		if (!vma || vma->vm_file || vma->vm_end < end) {
-			up_read(&mm->mmap_sem);
-			return -EPERM;
-		}
+		if (!vma || vma->vm_file || vma->vm_end < end)
+			range->vma = NULL;
 	}
 
-	/* loop enough times using contiguous pages of memory */
-	do {
-		unsigned num_pages = ttm->num_pages - pinned;
-		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
-		struct page **p = pages + pinned;
-		struct amdgpu_ttm_gup_task_list guptask;
-
-		guptask.task = current;
-		spin_lock(&gtt->guptasklock);
-		list_add(&guptask.list, &gtt->guptasks);
-		spin_unlock(&gtt->guptasklock);
+	if (!range->vma) {
+		r = -EPERM;
+		goto out;
+	}
 
-		if (mm == current->mm)
-			r = get_user_pages(userptr, num_pages, flags, p, NULL);
-		else
-			r = get_user_pages_remote(gtt->usertask,
-					mm, userptr, num_pages,
-					flags, p, NULL, NULL);
+	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
+				     GFP_KERNEL);
+	if (range->pfns == NULL) {
+		r = -ENOMEM;
+		goto out;
+	}
+	range->start = gtt->userptr;
+	range->end = end;
 
-		spin_lock(&gtt->guptasklock);
-		list_del(&guptask.list);
-		spin_unlock(&gtt->guptasklock);
+	for (i = 0; i < ttm->num_pages; i++) {
+		range->pfns[i] = range->flags[HMM_PFN_VALID];
+		range->pfns[i] |= amdgpu_ttm_tt_is_readonly(ttm) ?
+					0 : range->flags[HMM_PFN_WRITE];
+	}
 
-		if (r < 0)
-			goto release_pages;
+	/* This may trigger page table update */
+	r = hmm_vma_fault(range, true);
+	if (r)
+		goto out_free_pfns;
 
-		pinned += r;
+	up_read(&mm->mmap_sem);
 
-	} while (pinned < ttm->num_pages);
+	for (i = 0; i < ttm->num_pages; i++)
+		pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
 
-	up_read(&mm->mmap_sem);
 	return 0;
 
-release_pages:
-	release_pages(pages, pinned);
+out_free_pfns:
+	kvfree(range->pfns);
+	range->pfns = NULL;
+out:
 	up_read(&mm->mmap_sem);
 	return r;
 }
 
 /**
- * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
+ * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
+ * Check if the pages backing this ttm range have been invalidated
  *
- * Called by amdgpu_cs_list_validate(). This creates the page list
- * that backs user memory and will ultimately be mapped into the device
- * address space.
+ * Returns: true if pages are invalidated since the last time they've been set
  */
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	unsigned i;
+	bool r;
 
-	gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
-	for (i = 0; i < ttm->num_pages; ++i) {
-		if (ttm->pages[i])
-			put_page(ttm->pages[i]);
+	if (gtt == NULL || !gtt->userptr)
+		return false;
 
-		ttm->pages[i] = pages ? pages[i] : NULL;
+	r = hmm_vma_range_done(&gtt->range);
+
+	if (gtt->range.pfns) {
+		kvfree(gtt->range.pfns);
+		gtt->range.pfns = NULL;
 	}
+
+	return !r;
 }
 
 /**
@@ -903,9 +897,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 	/* unmap the pages mapped to the device */
 	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
 
-	/* mark the pages as dirty */
-	amdgpu_ttm_tt_mark_user_pages(ttm);
-
 	sg_free_table(ttm->sg);
 }
 
@@ -1207,7 +1198,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
 
 	if (gtt && gtt->userptr) {
-		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
+		memset(ttm->pages, 0, ttm->num_pages * sizeof(struct page *));
 		kfree(ttm->sg);
 		ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
 		return;
@@ -1258,8 +1249,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 
 	spin_lock_init(&gtt->guptasklock);
 	INIT_LIST_HEAD(&gtt->guptasks);
-	atomic_set(&gtt->mmu_invalidations, 0);
-	gtt->last_set_pages = 0;
 
 	return 0;
 }
@@ -1289,7 +1278,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	struct amdgpu_ttm_gup_task_list *entry;
 	unsigned long size;
 
 	if (gtt == NULL || !gtt->userptr)
@@ -1302,48 +1290,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 	if (gtt->userptr > end || gtt->userptr + size <= start)
 		return false;
 
-	/* Search the lists of tasks that hold this mapping and see
-	 * if current is one of them.  If it is return false.
-	 */
-	spin_lock(&gtt->guptasklock);
-	list_for_each_entry(entry, &gtt->guptasks, list) {
-		if (entry->task == current) {
-			spin_unlock(&gtt->guptasklock);
-			return false;
-		}
-	}
-	spin_unlock(&gtt->guptasklock);
-
-	atomic_inc(&gtt->mmu_invalidations);
-
 	return true;
 }
 
 /**
- * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
+ * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
  */
-bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
-				       int *last_invalidated)
-{
-	struct amdgpu_ttm_tt *gtt = (void *)ttm;
-	int prev_invalidated = *last_invalidated;
-
-	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
-	return prev_invalidated != *last_invalidated;
-}
-
-/**
- * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
- * been invalidated since the last time they've been set?
- */
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
 {
 	struct amdgpu_ttm_tt *gtt = (void *)ttm;
 
 	if (gtt == NULL || !gtt->userptr)
 		return false;
 
-	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
+	return true;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index b5b2d101f7db..c5a1e78a09b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -102,7 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
 int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
 
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
-void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
 void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
 int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 				     uint32_t flags);
@@ -112,7 +112,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 				  unsigned long end);
 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
 				       int *last_invalidated);
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
 bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
 uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
 uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index f059ec314f2b..ab8fa84b27a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -619,7 +619,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 	entry->tv.bo = &vm->root.base.bo->tbo;
 	/* One for the VM updates, one for TTM and one for the CS job */
 	entry->tv.num_shared = 3;
-	entry->user_pages = NULL;
 	list_add(&entry->tv.head, validated);
 }
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* RE: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
       [not found] ` <20181206210235.12036-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
  2018-12-06 21:02   ` [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency Yang, Philip
  2018-12-06 21:02   ` [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v3 Yang, Philip
@ 2018-12-07 12:00   ` Zhou, David(ChunMing)
       [not found]     ` <BY1PR12MB0502882984A59380E5F17F66B4AA0-PicGAnIBOobrCwm+z9iKNgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  2 siblings, 1 reply; 13+ messages in thread
From: Zhou, David(ChunMing) @ 2018-12-07 12:00 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Yang, Philip

Even you should rename amdgpu_mn.c/h to amdgpu_hmm.c/h.

-David

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Yang,
> Philip
> Sent: Friday, December 07, 2018 5:03 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Yang, Philip <Philip.Yang@amd.com>
> Subject: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu
> notifier v6
> 
> Replace our MMU notifier with
> hmm_mirror_ops.sync_cpu_device_pagetables
> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a
> dependency in DRM_AMDGPU_USERPTR Kconfig.
> 
> It supports both KFD userptr and gfx userptr paths.
> 
> The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
> kernel now.
> 
> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>  drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 122 ++++++++++-------------
> --
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>  4 files changed, 55 insertions(+), 77 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig
> b/drivers/gpu/drm/amd/amdgpu/Kconfig
> index 9221e5489069..960a63355705 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK  config
> DRM_AMDGPU_USERPTR
>  	bool "Always enable userptr write support"
>  	depends on DRM_AMDGPU
> -	select MMU_NOTIFIER
> +	select HMM_MIRROR
>  	help
> -	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
> -	  selected to enabled full userptr support.
> +	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
> +	  isn't already selected to enabled full userptr support.
> 
>  config DRM_AMDGPU_GART_DEBUGFS
>  	bool "Allow GART access through debugfs"
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
> b/drivers/gpu/drm/amd/amdgpu/Makefile
> index f76bcb9c45e4..675efc850ff4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -172,7 +172,7 @@ endif
>  amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>  amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>  amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
> +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
> 
>  include $(FULL_AMD_PATH)/powerplay/Makefile
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index e55508b39496..5d518d2bb9be 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -45,7 +45,7 @@
> 
>  #include <linux/firmware.h>
>  #include <linux/module.h>
> -#include <linux/mmu_notifier.h>
> +#include <linux/hmm.h>
>  #include <linux/interval_tree.h>
>  #include <drm/drmP.h>
>  #include <drm/drm.h>
> @@ -58,7 +58,6 @@
>   *
>   * @adev: amdgpu device pointer
>   * @mm: process address space
> - * @mn: MMU notifier structure
>   * @type: type of MMU notifier
>   * @work: destruction work item
>   * @node: hash table node to find structure by adev and mn @@ -66,6 +65,7
> @@
>   * @objects: interval tree containing amdgpu_mn_nodes
>   * @read_lock: mutex for recursive locking of @lock
>   * @recursion: depth of recursion
> + * @mirror: HMM mirror function support
>   *
>   * Data for each amdgpu device and process address space.
>   */
> @@ -73,7 +73,6 @@ struct amdgpu_mn {
>  	/* constant after initialisation */
>  	struct amdgpu_device	*adev;
>  	struct mm_struct	*mm;
> -	struct mmu_notifier	mn;
>  	enum amdgpu_mn_type	type;
> 
>  	/* only used on destruction */
> @@ -87,6 +86,9 @@ struct amdgpu_mn {
>  	struct rb_root_cached	objects;
>  	struct mutex		read_lock;
>  	atomic_t		recursion;
> +
> +	/* HMM mirror */
> +	struct hmm_mirror	mirror;
>  };
> 
>  /**
> @@ -103,7 +105,7 @@ struct amdgpu_mn_node {  };
> 
>  /**
> - * amdgpu_mn_destroy - destroy the MMU notifier
> + * amdgpu_mn_destroy - destroy the HMM mirror
>   *
>   * @work: previously sheduled work item
>   *
> @@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct
> *work)
>  	}
>  	up_write(&amn->lock);
>  	mutex_unlock(&adev->mn_lock);
> -	mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
> +
> +	hmm_mirror_unregister(&amn->mirror);
>  	kfree(amn);
>  }
> 
>  /**
> - * amdgpu_mn_release - callback to notify about mm destruction
> + * amdgpu_hmm_mirror_release - callback to notify about mm destruction
>   *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> + * @mirror: the HMM mirror (mm) this callback is about
>   *
> - * Shedule a work item to lazy destroy our notifier.
> + * Shedule a work item to lazy destroy HMM mirror.
>   */
> -static void amdgpu_mn_release(struct mmu_notifier *mn,
> -			      struct mm_struct *mm)
> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>  {
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
> mn);
> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
> +mirror);
> 
>  	INIT_WORK(&amn->work, amdgpu_mn_destroy);
>  	schedule_work(&amn->work);
>  }
> 
> -
>  /**
>   * amdgpu_mn_lock - take the write side lock for this notifier
>   *
> @@ -237,21 +237,19 @@ static void amdgpu_mn_invalidate_node(struct
> amdgpu_mn_node *node,
>  /**
>   * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm
> change
>   *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> + * @mirror: the hmm_mirror (mm) is about to update
> + * @update: the update start, end address
>   *
>   * Block for operations on BOs to finish and mark pages as accessed and
>   * potentially dirty.
>   */
> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
> -						 struct mm_struct *mm,
> -						 unsigned long start,
> -						 unsigned long end,
> -						 bool blockable)
> +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
> +			const struct hmm_update *update)
>  {
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
> mn);
> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
> mirror);
> +	unsigned long start = update->start;
> +	unsigned long end = update->end;
> +	bool blockable = update->blockable;
>  	struct interval_tree_node *it;
> 
>  	/* notification is exclusive, but interval is inclusive */ @@ -278,28
> +276,28 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct
> mmu_notifier *mn,
>  		amdgpu_mn_invalidate_node(node, start, end);
>  	}
> 
> +	amdgpu_mn_read_unlock(amn);
> +
>  	return 0;
>  }
> 
>  /**
>   * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm
> change
>   *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> + * @mirror: the hmm_mirror (mm) is about to update
> + * @update: the update start, end address
>   *
>   * We temporarily evict all BOs between start and end. This
>   * necessitates evicting all user-mode queues of the process. The BOs
>   * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>   */
> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier
> *mn,
> -						 struct mm_struct *mm,
> -						 unsigned long start,
> -						 unsigned long end,
> -						 bool blockable)
> +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
> +			const struct hmm_update *update)
>  {
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
> mn);
> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
> mirror);
> +	unsigned long start = update->start;
> +	unsigned long end = update->end;
> +	bool blockable = update->blockable;
>  	struct interval_tree_node *it;
> 
>  	/* notification is exclusive, but interval is inclusive */ @@ -326,59
> +324,39 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct
> mmu_notifier *mn,
> 
>  			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>  							 start, end))
> -				amdgpu_amdkfd_evict_userptr(mem, mm);
> +				amdgpu_amdkfd_evict_userptr(mem, amn-
> >mm);
>  		}
>  	}
> 
> +	amdgpu_mn_read_unlock(amn);
> +
>  	return 0;
>  }
> 
> -/**
> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
> - *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> - *
> - * Release the lock again to allow new command submissions.
> +/* Low bits of any reasonable mm pointer will be unused due to struct
> + * alignment. Use these bits to make a unique key from the mm pointer
> + * and notifier type.
>   */
> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
> -					   struct mm_struct *mm,
> -					   unsigned long start,
> -					   unsigned long end)
> -{
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
> mn);
> -
> -	amdgpu_mn_read_unlock(amn);
> -}
> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
> 
> -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>  	[AMDGPU_MN_TYPE_GFX] = {
> -		.release = amdgpu_mn_release,
> -		.invalidate_range_start =
> amdgpu_mn_invalidate_range_start_gfx,
> -		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
> +		.sync_cpu_device_pagetables =
> amdgpu_mn_sync_pagetables_gfx,
> +		.release = amdgpu_hmm_mirror_release
>  	},
>  	[AMDGPU_MN_TYPE_HSA] = {
> -		.release = amdgpu_mn_release,
> -		.invalidate_range_start =
> amdgpu_mn_invalidate_range_start_hsa,
> -		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
> +		.sync_cpu_device_pagetables =
> amdgpu_mn_sync_pagetables_hsa,
> +		.release = amdgpu_hmm_mirror_release
>  	},
>  };
> 
> -/* Low bits of any reasonable mm pointer will be unused due to struct
> - * alignment. Use these bits to make a unique key from the mm pointer
> - * and notifier type.
> - */
> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
> -
>  /**
> - * amdgpu_mn_get - create notifier context
> + * amdgpu_mn_get - create HMM mirror context
>   *
>   * @adev: amdgpu device pointer
>   * @type: type of MMU notifier context
>   *
> - * Creates a notifier context for current->mm.
> + * Creates a HMM mirror context for current->mm.
>   */
>  struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>  				enum amdgpu_mn_type type)
> @@ -408,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct
> amdgpu_device *adev,
>  	amn->mm = mm;
>  	init_rwsem(&amn->lock);
>  	amn->type = type;
> -	amn->mn.ops = &amdgpu_mn_ops[type];
>  	amn->objects = RB_ROOT_CACHED;
>  	mutex_init(&amn->read_lock);
>  	atomic_set(&amn->recursion, 0);
> 
> -	r = __mmu_notifier_register(&amn->mn, mm);
> +	amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
> +	r = hmm_mirror_register(&amn->mirror, mm);
>  	if (r)
>  		goto free_amn;
> 
> @@ -439,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct
> amdgpu_device *adev,
>   * @bo: amdgpu buffer object
>   * @addr: userptr addr we should monitor
>   *
> - * Registers an MMU notifier for the given BO at the specified address.
> + * Registers an HMM mirror for the given BO at the specified address.
>   * Returns 0 on success, -ERRNO if anything goes wrong.
>   */
>  int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) @@ -
> 495,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo,
> unsigned long addr)  }
> 
>  /**
> - * amdgpu_mn_unregister - unregister a BO for notifier updates
> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>   *
>   * @bo: amdgpu buffer object
>   *
> - * Remove any registration of MMU notifier updates from the buffer object.
> + * Remove any registration of HMM mirror updates from the buffer object.
>   */
>  void amdgpu_mn_unregister(struct amdgpu_bo *bo)  { diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> index eb0f432f78fe..0a51fd00021c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>  	AMDGPU_MN_TYPE_HSA,
>  };
> 
> -#if defined(CONFIG_MMU_NOTIFIER)
> +#if defined(CONFIG_HMM_MIRROR)
>  void amdgpu_mn_lock(struct amdgpu_mn *mn);  void
> amdgpu_mn_unlock(struct amdgpu_mn *mn);  struct amdgpu_mn
> *amdgpu_mn_get(struct amdgpu_device *adev,
> --
> 2.17.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v3
       [not found]     ` <20181206210235.12036-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-11  0:12       ` Kuehling, Felix
       [not found]         ` <b5ff9fa4-97c9-4524-d4dd-34c8003274fa-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: Kuehling, Felix @ 2018-12-11  0:12 UTC (permalink / raw)
  To: Yang, Philip, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

This is a nice improvement from the last version. I still see some
potential problems. See inline ...

I'm skipping over the CS and GEM parts. I hope Christian can review
those parts.

On 2018-12-06 4:02 p.m., Yang, Philip wrote:
> Use HMM helper function hmm_vma_fault() to get physical pages backing
> userptr and start CPU page table update track of those pages. Then use
> hmm_vma_range_done() to check if those pages are updated before
> amdgpu_cs_submit for gfx or before user queues are resumed for kfd.
>
> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
> from scratch, for kfd, restore worker is rescheduled to retry.
>
> HMM simplify the CPU page table concurrent update check, so remove
> guptasklock, mmu_invalidations, last_set_pages fields from
> amdgpu_ttm_tt struct.
>
> HMM does not pin the page (increase page ref count), so remove related
> operations like release_pages(), put_page(), mark_page_dirty().
>
> Change-Id: Id2d3130378b44a774e0d77156102a20a203b5036
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   1 -
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  81 ++------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c   |   2 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 188 +++++++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  14 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c        |  34 +++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h        |   7 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 160 ++++++---------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   4 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |   1 -
>  11 files changed, 206 insertions(+), 289 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index bcf587b4ba98..b492dd9e541a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -62,7 +62,6 @@ struct kgd_mem {
>  
>  	atomic_t invalid;
>  	struct amdkfd_process_info *process_info;
> -	struct page **user_pages;
>  
>  	struct amdgpu_sync sync;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index b29ef088fa14..75833b40507b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -580,28 +580,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
>  		goto out;
>  	}
>  
> -	/* If no restore worker is running concurrently, user_pages
> -	 * should not be allocated
> -	 */
> -	WARN(mem->user_pages, "Leaking user_pages array");
> -
> -	mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
> -					   sizeof(struct page *),
> -					   GFP_KERNEL | __GFP_ZERO);
> -	if (!mem->user_pages) {
> -		pr_err("%s: Failed to allocate pages array\n", __func__);
> -		ret = -ENOMEM;
> -		goto unregister_out;
> -	}
> -
> -	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
> +	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
>  	if (ret) {
>  		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
> -		goto free_out;
> +		goto unregister_out;
>  	}
>  
> -	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
> -
>  	ret = amdgpu_bo_reserve(bo, true);
>  	if (ret) {
>  		pr_err("%s: Failed to reserve BO\n", __func__);
> @@ -614,11 +598,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
>  	amdgpu_bo_unreserve(bo);
>  
>  release_out:
> -	if (ret)
> -		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
> -free_out:
> -	kvfree(mem->user_pages);
> -	mem->user_pages = NULL;
> +	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
>  unregister_out:
>  	if (ret)
>  		amdgpu_mn_unregister(bo);
> @@ -677,7 +657,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
>  	ctx->kfd_bo.priority = 0;
>  	ctx->kfd_bo.tv.bo = &bo->tbo;
>  	ctx->kfd_bo.tv.num_shared = 1;
> -	ctx->kfd_bo.user_pages = NULL;
>  	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>  
>  	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
> @@ -741,7 +720,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
>  	ctx->kfd_bo.priority = 0;
>  	ctx->kfd_bo.tv.bo = &bo->tbo;
>  	ctx->kfd_bo.tv.num_shared = 1;
> -	ctx->kfd_bo.user_pages = NULL;
>  	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>  
>  	i = 0;
> @@ -1329,15 +1307,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>  	list_del(&bo_list_entry->head);
>  	mutex_unlock(&process_info->lock);
>  
> -	/* Free user pages if necessary */
> -	if (mem->user_pages) {
> -		pr_debug("%s: Freeing user_pages array\n", __func__);
> -		if (mem->user_pages[0])
> -			release_pages(mem->user_pages,
> -					mem->bo->tbo.ttm->num_pages);
> -		kvfree(mem->user_pages);
> -	}
> -
>  	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
>  	if (unlikely(ret))
>  		return ret;
> @@ -1751,25 +1720,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>  
>  		bo = mem->bo;
>  
> -		if (!mem->user_pages) {
> -			mem->user_pages =
> -				kvmalloc_array(bo->tbo.ttm->num_pages,
> -						 sizeof(struct page *),
> -						 GFP_KERNEL | __GFP_ZERO);
> -			if (!mem->user_pages) {
> -				pr_err("%s: Failed to allocate pages array\n",
> -				       __func__);
> -				return -ENOMEM;
> -			}
> -		} else if (mem->user_pages[0]) {
> -			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
> -		}
> -
>  		/* Get updated user pages */
>  		ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
> -						   mem->user_pages);
> +						   bo->tbo.ttm->pages);
>  		if (ret) {
> -			mem->user_pages[0] = NULL;
> +			bo->tbo.ttm->pages[0] = NULL;
>  			pr_info("%s: Failed to get user pages: %d\n",
>  				__func__, ret);
>  			/* Pretend it succeeded. It will fail later
> @@ -1778,12 +1733,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>  			 * stalled user mode queues.
>  			 */
>  		}
> -
> -		/* Mark the BO as valid unless it was invalidated
> -		 * again concurrently
> -		 */
> -		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
> -			return -EAGAIN;

I think you're removing this to make sure we exactly pair up
amdgpu_ttm_tt_get_user_pages and amdgpu_ttm_tt_get_user_pages_done
calls. But there are other error conditions in
validate_invalid_user_pages that would skip the
amdgpu_ttm_tt_get_user_pages_done calls. That will need to be handled
properly in the error-handling code paths in validate_invalid_user_pages.


>  	}
>  
>  	return 0;
> @@ -1863,10 +1812,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>  
>  		bo = mem->bo;
>  
> -		/* Copy pages array and validate the BO if we got user pages */
> -		if (mem->user_pages[0]) {
> -			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
> -						     mem->user_pages);
> +		/* Validate the BO if we got user pages */
> +		if (bo->tbo.ttm->pages[0]) {
>  			amdgpu_bo_placement_from_domain(bo, mem->domain);
>  			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>  			if (ret) {
> @@ -1875,16 +1822,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>  			}
>  		}
>  
> -		/* Validate succeeded, now the BO owns the pages, free
> -		 * our copy of the pointer array. Put this BO back on
> -		 * the userptr_valid_list. If we need to revalidate
> -		 * it, we need to start from scratch.
> -		 */
> -		kvfree(mem->user_pages);
> -		mem->user_pages = NULL;
>  		list_move_tail(&mem->validate_list.head,
>  			       &process_info->userptr_valid_list);
>  
> +		/* Stop HMM track the userptr update. We dont check the return
> +		 * value for concurrent CPU page table update because we will
> +		 * reschedule the restore worker if process_info->evicted_bos
> +		 * is updated.
> +		 */
> +		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
> +
>  		/* Update mapping. If the BO was not validated
>  		 * (because we couldn't get user pages), this will
>  		 * clear the page table entries, which will result in
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> index 5c79da8e1150..3842183970cc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> @@ -200,8 +200,6 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
>  
>  		if (!bo->parent)
>  			list_add_tail(&e->tv.head, &bucket[priority]);
> -
> -		e->user_pages = NULL;
>  	}
>  
>  	/* Connect the sorted buckets in the output list. */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index 7c5f5d1601e6..4beab2de09b3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -35,8 +35,7 @@ struct amdgpu_bo_list_entry {
>  	struct ttm_validate_buffer	tv;
>  	struct amdgpu_bo_va		*bo_va;
>  	uint32_t			priority;
> -	struct page			**user_pages;
> -	int				user_invalidated;
> +	bool				user_invalidated;
>  };
>  
>  struct amdgpu_bo_list {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
[snip]
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
[snip]
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index 5d518d2bb9be..c8002ee1d415 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
>  			true, false, MAX_SCHEDULE_TIMEOUT);
>  		if (r <= 0)
>  			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
> -
> -		amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
>  	}
>  }
>  
> @@ -353,15 +351,16 @@ static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>  /**
>   * amdgpu_mn_get - create HMM mirror context
>   *
> + * @mm: the mm struct
>   * @adev: amdgpu device pointer
>   * @type: type of MMU notifier context
>   *
> - * Creates a HMM mirror context for current->mm.
> + * Creates a HMM mirror context for mm.
>   */
> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,

I think this change isn't needed any more. All callers of this function
specify mm as current->mm anyway.


> +				struct amdgpu_device *adev,
>  				enum amdgpu_mn_type type)
>  {
> -	struct mm_struct *mm = current->mm;
>  	struct amdgpu_mn *amn;
>  	unsigned long key = AMDGPU_MN_KEY(mm, type);
>  	int r;
> @@ -431,7 +430,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>  	struct list_head bos;
>  	struct interval_tree_node *it;
>  
> -	amn = amdgpu_mn_get(adev, type);
> +	amn = amdgpu_mn_get(current->mm, adev, type);
>  	if (IS_ERR(amn))
>  		return PTR_ERR(amn);
>  
> @@ -513,3 +512,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>  	mutex_unlock(&adev->mn_lock);
>  }
>  
> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */
> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
> +		(1 << 0), /* HMM_PFN_VALID */
> +		(1 << 1), /* HMM_PFN_WRITE */
> +		0 /* HMM_PFN_DEVICE_PRIVATE */
> +};
> +
> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
> +		0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
> +		0, /* HMM_PFN_NONE */
> +		0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
> +};
> +
> +void amdgpu_hmm_init_range(struct hmm_range *range)
> +{
> +	if (range) {
> +		range->flags = hmm_range_flags;
> +		range->values = hmm_range_values;
> +		range->pfn_shift = PAGE_SHIFT;
> +		range->pfns = NULL;
> +		INIT_LIST_HEAD(&range->list);
> +	}
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> index 0a51fd00021c..4c5e86847a3b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> @@ -25,9 +25,10 @@
>  #define __AMDGPU_MN_H__
>  
>  /*
> - * MMU Notifier
> + * HMM mirror
>   */
>  struct amdgpu_mn;
> +struct hmm_range;
>  
>  enum amdgpu_mn_type {
>  	AMDGPU_MN_TYPE_GFX,
> @@ -37,10 +38,12 @@ enum amdgpu_mn_type {
>  #if defined(CONFIG_HMM_MIRROR)
>  void amdgpu_mn_lock(struct amdgpu_mn *mn);
>  void amdgpu_mn_unlock(struct amdgpu_mn *mn);
> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
> +				struct amdgpu_device *adev,
>  				enum amdgpu_mn_type type);
>  int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
>  void amdgpu_mn_unregister(struct amdgpu_bo *bo);
> +void amdgpu_hmm_init_range(struct hmm_range *range);
>  #else
>  static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
>  static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c91ec3101d00..33057d6114c7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -43,6 +43,7 @@
>  #include <linux/pagemap.h>
>  #include <linux/debugfs.h>
>  #include <linux/iommu.h>
> +#include <linux/hmm.h>
>  #include "amdgpu.h"
>  #include "amdgpu_object.h"
>  #include "amdgpu_trace.h"
> @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
>  /*
>   * TTM backend functions.
>   */
> -struct amdgpu_ttm_gup_task_list {
> -	struct list_head	list;
> -	struct task_struct	*task;
> -};
> -
>  struct amdgpu_ttm_tt {
>  	struct ttm_dma_tt	ttm;
>  	u64			offset;
> @@ -718,107 +714,105 @@ struct amdgpu_ttm_tt {
>  	uint32_t		userflags;
>  	spinlock_t              guptasklock;
>  	struct list_head        guptasks;
> -	atomic_t		mmu_invalidations;
> -	uint32_t		last_set_pages;
> +	struct hmm_range	range;
>  };
>  
>  /**
> - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
> - * pointer to memory
> + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
> + * memory and start HMM tracking CPU page table update
>   *
> - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
> - * This provides a wrapper around the get_user_pages() call to provide
> - * device accessible pages that back user memory.
> + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
> + * once afterwards to stop HMM tracking
>   */
>  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>  {
>  	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>  	struct mm_struct *mm = gtt->usertask->mm;
> -	unsigned int flags = 0;
> -	unsigned pinned = 0;
> -	int r;
> +	unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> +	struct hmm_range *range = &gtt->range;
> +	int r, i;
>  
>  	if (!mm) /* Happens during process shutdown */
>  		return -ESRCH;
>  
> -	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
> -		flags |= FOLL_WRITE;
> +	amdgpu_hmm_init_range(range);
>  
>  	down_read(&mm->mmap_sem);
>  
> +	range->vma = find_vma(mm, gtt->userptr);

There is still an assumption here that our userptr BOs cover only a
single VM. This is a new limitation with HMM that the previous
get_user_pages-based implementation did not have. We're probably OK with
this for now. But we may run into regressions if there are any ROCm
application that register user memory spanning multiple VMAs. If that
happens, we may need to generalize this so that each userptr BO can have
multiple HMM ranges.


>  	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
>  		/*
>  		 * check that we only use anonymous memory to prevent problems
>  		 * with writeback
>  		 */
> -		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> -		struct vm_area_struct *vma;
> +		struct vm_area_struct *vma = range->vma;
>  
> -		vma = find_vma(mm, gtt->userptr);
> -		if (!vma || vma->vm_file || vma->vm_end < end) {
> -			up_read(&mm->mmap_sem);
> -			return -EPERM;
> -		}
> +		if (!vma || vma->vm_file || vma->vm_end < end)
> +			range->vma = NULL;
>  	}
>  
> -	/* loop enough times using contiguous pages of memory */
> -	do {
> -		unsigned num_pages = ttm->num_pages - pinned;
> -		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
> -		struct page **p = pages + pinned;
> -		struct amdgpu_ttm_gup_task_list guptask;
> -
> -		guptask.task = current;
> -		spin_lock(&gtt->guptasklock);
> -		list_add(&guptask.list, &gtt->guptasks);
> -		spin_unlock(&gtt->guptasklock);
> +	if (!range->vma) {
> +		r = -EPERM;

You're treating permissions errors and errors finding a VMA the same
way. And I think you don't detect failures to find a VMA properly in the
first place. find_vma may return a VMA that doesn't include the address
provided. It's documented as "Look up the first VMA which satisfies 
addr < vm_end,  NULL if none." You should also check that the VMA covers
the entire address range of the userptr BO. There is a helper function
in linux/mm.h that does that: range_in_vma(range->vma, gtt->userptr,
gtt->userptr + gtt->userptr + ttm->num_pages * PAGE_SIZE). You should
check this condition in all cases.

There should be two distinct types of errors. If find_vma fails to find
a matching VMA, that should be -EFAULT. If the gtt->userflags don't
match the VMA properties, that should be -EPERM.


> +		goto out;
> +	}
>  
> -		if (mm == current->mm)
> -			r = get_user_pages(userptr, num_pages, flags, p, NULL);
> -		else
> -			r = get_user_pages_remote(gtt->usertask,
> -					mm, userptr, num_pages,
> -					flags, p, NULL, NULL);
> +	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
> +				     GFP_KERNEL);
> +	if (range->pfns == NULL) {
> +		r = -ENOMEM;
> +		goto out;
> +	}
> +	range->start = gtt->userptr;
> +	range->end = end;
>  
> -		spin_lock(&gtt->guptasklock);
> -		list_del(&guptask.list);
> -		spin_unlock(&gtt->guptasklock);
> +	for (i = 0; i < ttm->num_pages; i++) {
> +		range->pfns[i] = range->flags[HMM_PFN_VALID];
> +		range->pfns[i] |= amdgpu_ttm_tt_is_readonly(ttm) ?
> +					0 : range->flags[HMM_PFN_WRITE];
> +	}

You could optimize this slightly. You don't need to call
amdgpu_ttm_tt_is_readonly in each loop iteration. The flags are the same
for all pfns. So do the conditional thing on range->pfns[0], and then
just copy that to all the remaining pfns.


>  
> -		if (r < 0)
> -			goto release_pages;
> +	/* This may trigger page table update */
> +	r = hmm_vma_fault(range, true);
> +	if (r)
> +		goto out_free_pfns;
>  
> -		pinned += r;
> +	up_read(&mm->mmap_sem);
>  
> -	} while (pinned < ttm->num_pages);
> +	for (i = 0; i < ttm->num_pages; i++)
> +		pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
>  
> -	up_read(&mm->mmap_sem);
>  	return 0;
>  
> -release_pages:
> -	release_pages(pages, pinned);
> +out_free_pfns:
> +	kvfree(range->pfns);
> +	range->pfns = NULL;
> +out:
>  	up_read(&mm->mmap_sem);
>  	return r;
>  }
>  
>  /**
> - * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
> + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
> + * Check if the pages backing this ttm range have been invalidated
>   *
> - * Called by amdgpu_cs_list_validate(). This creates the page list
> - * that backs user memory and will ultimately be mapped into the device
> - * address space.
> + * Returns: true if pages are invalidated since the last time they've been set

I still don't like that you're inverting the return value of
hmm_vma_range_done. This will get confusing in the future. Could this
function instead return "true if pages are still valid"?


>   */
> -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
>  {
>  	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> -	unsigned i;
> +	bool r;
>  
> -	gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
> -	for (i = 0; i < ttm->num_pages; ++i) {
> -		if (ttm->pages[i])
> -			put_page(ttm->pages[i]);
> +	if (gtt == NULL || !gtt->userptr)
> +		return false;

Use "!gtt" instead of "gtt == NULL". Also, returning "false" here means
the pages are still valid. That seems like the wrong answer for this
kind of failure. "false" would make sense here if you followed my
suggestion above to return true, if the pages are still valid.


>  
> -		ttm->pages[i] = pages ? pages[i] : NULL;
> +	r = hmm_vma_range_done(&gtt->range);
> +
> +	if (gtt->range.pfns) {
> +		kvfree(gtt->range.pfns);
> +		gtt->range.pfns = NULL;
>  	}
> +
> +	return !r;

I still don't like that you're inverting the return value of
hmm_vma_range_done. This will get confusing in the future.

Regards,
  Felix


>  }
>  
>  /**
> @@ -903,9 +897,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
>  	/* unmap the pages mapped to the device */
>  	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
>  
> -	/* mark the pages as dirty */
> -	amdgpu_ttm_tt_mark_user_pages(ttm);
> -
>  	sg_free_table(ttm->sg);
>  }
>  
> @@ -1207,7 +1198,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
>  	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
>  
>  	if (gtt && gtt->userptr) {
> -		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
> +		memset(ttm->pages, 0, ttm->num_pages * sizeof(struct page *));
>  		kfree(ttm->sg);
>  		ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
>  		return;
> @@ -1258,8 +1249,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>  
>  	spin_lock_init(&gtt->guptasklock);
>  	INIT_LIST_HEAD(&gtt->guptasks);
> -	atomic_set(&gtt->mmu_invalidations, 0);
> -	gtt->last_set_pages = 0;
>  
>  	return 0;
>  }
> @@ -1289,7 +1278,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>  				  unsigned long end)
>  {
>  	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> -	struct amdgpu_ttm_gup_task_list *entry;
>  	unsigned long size;
>  
>  	if (gtt == NULL || !gtt->userptr)
> @@ -1302,48 +1290,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>  	if (gtt->userptr > end || gtt->userptr + size <= start)
>  		return false;
>  
> -	/* Search the lists of tasks that hold this mapping and see
> -	 * if current is one of them.  If it is return false.
> -	 */
> -	spin_lock(&gtt->guptasklock);
> -	list_for_each_entry(entry, &gtt->guptasks, list) {
> -		if (entry->task == current) {
> -			spin_unlock(&gtt->guptasklock);
> -			return false;
> -		}
> -	}
> -	spin_unlock(&gtt->guptasklock);
> -
> -	atomic_inc(&gtt->mmu_invalidations);
> -
>  	return true;
>  }
>  
>  /**
> - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
> + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
>   */
> -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
> -				       int *last_invalidated)
> -{
> -	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> -	int prev_invalidated = *last_invalidated;
> -
> -	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
> -	return prev_invalidated != *last_invalidated;
> -}
> -
> -/**
> - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
> - * been invalidated since the last time they've been set?
> - */
> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
>  {
>  	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>  
>  	if (gtt == NULL || !gtt->userptr)
>  		return false;
>  
> -	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
> +	return true;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index b5b2d101f7db..c5a1e78a09b1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -102,7 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
>  int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>  
>  int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
> -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
>  void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
>  int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>  				     uint32_t flags);
> @@ -112,7 +112,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>  				  unsigned long end);
>  bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
>  				       int *last_invalidated);
> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
>  bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
>  uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
>  uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index f059ec314f2b..ab8fa84b27a6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -619,7 +619,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>  	entry->tv.bo = &vm->root.base.bo->tbo;
>  	/* One for the VM updates, one for TTM and one for the CS job */
>  	entry->tv.num_shared = 3;
> -	entry->user_pages = NULL;
>  	list_add(&entry->tv.head, validated);
>  }
>  
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v3
       [not found]         ` <b5ff9fa4-97c9-4524-d4dd-34c8003274fa-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-11  8:27           ` Christian König
  2018-12-13 20:56           ` Yang, Philip
  1 sibling, 0 replies; 13+ messages in thread
From: Christian König @ 2018-12-11  8:27 UTC (permalink / raw)
  To: Kuehling, Felix, Yang, Philip, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 11.12.18 um 01:12 schrieb Kuehling, Felix:
> This is a nice improvement from the last version. I still see some
> potential problems. See inline ...
>
> I'm skipping over the CS and GEM parts. I hope Christian can review
> those parts.

Going to do this as soon as you have sorted out all the issues with the 
KFD implementation.

So please ping me when you think that there are no more issues left there.

Thanks,
Christian.

>
> On 2018-12-06 4:02 p.m., Yang, Philip wrote:
>> Use HMM helper function hmm_vma_fault() to get physical pages backing
>> userptr and start CPU page table update track of those pages. Then use
>> hmm_vma_range_done() to check if those pages are updated before
>> amdgpu_cs_submit for gfx or before user queues are resumed for kfd.
>>
>> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
>> from scratch, for kfd, restore worker is rescheduled to retry.
>>
>> HMM simplify the CPU page table concurrent update check, so remove
>> guptasklock, mmu_invalidations, last_set_pages fields from
>> amdgpu_ttm_tt struct.
>>
>> HMM does not pin the page (increase page ref count), so remove related
>> operations like release_pages(), put_page(), mark_page_dirty().
>>
>> Change-Id: Id2d3130378b44a774e0d77156102a20a203b5036
>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   1 -
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  81 ++------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c   |   2 -
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   3 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 188 +++++++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  14 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c        |  34 +++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h        |   7 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 160 ++++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   4 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |   1 -
>>   11 files changed, 206 insertions(+), 289 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index bcf587b4ba98..b492dd9e541a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -62,7 +62,6 @@ struct kgd_mem {
>>   
>>   	atomic_t invalid;
>>   	struct amdkfd_process_info *process_info;
>> -	struct page **user_pages;
>>   
>>   	struct amdgpu_sync sync;
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index b29ef088fa14..75833b40507b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -580,28 +580,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
>>   		goto out;
>>   	}
>>   
>> -	/* If no restore worker is running concurrently, user_pages
>> -	 * should not be allocated
>> -	 */
>> -	WARN(mem->user_pages, "Leaking user_pages array");
>> -
>> -	mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
>> -					   sizeof(struct page *),
>> -					   GFP_KERNEL | __GFP_ZERO);
>> -	if (!mem->user_pages) {
>> -		pr_err("%s: Failed to allocate pages array\n", __func__);
>> -		ret = -ENOMEM;
>> -		goto unregister_out;
>> -	}
>> -
>> -	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
>> +	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
>>   	if (ret) {
>>   		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
>> -		goto free_out;
>> +		goto unregister_out;
>>   	}
>>   
>> -	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
>> -
>>   	ret = amdgpu_bo_reserve(bo, true);
>>   	if (ret) {
>>   		pr_err("%s: Failed to reserve BO\n", __func__);
>> @@ -614,11 +598,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
>>   	amdgpu_bo_unreserve(bo);
>>   
>>   release_out:
>> -	if (ret)
>> -		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
>> -free_out:
>> -	kvfree(mem->user_pages);
>> -	mem->user_pages = NULL;
>> +	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
>>   unregister_out:
>>   	if (ret)
>>   		amdgpu_mn_unregister(bo);
>> @@ -677,7 +657,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
>>   	ctx->kfd_bo.priority = 0;
>>   	ctx->kfd_bo.tv.bo = &bo->tbo;
>>   	ctx->kfd_bo.tv.num_shared = 1;
>> -	ctx->kfd_bo.user_pages = NULL;
>>   	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>>   
>>   	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
>> @@ -741,7 +720,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
>>   	ctx->kfd_bo.priority = 0;
>>   	ctx->kfd_bo.tv.bo = &bo->tbo;
>>   	ctx->kfd_bo.tv.num_shared = 1;
>> -	ctx->kfd_bo.user_pages = NULL;
>>   	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>>   
>>   	i = 0;
>> @@ -1329,15 +1307,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>   	list_del(&bo_list_entry->head);
>>   	mutex_unlock(&process_info->lock);
>>   
>> -	/* Free user pages if necessary */
>> -	if (mem->user_pages) {
>> -		pr_debug("%s: Freeing user_pages array\n", __func__);
>> -		if (mem->user_pages[0])
>> -			release_pages(mem->user_pages,
>> -					mem->bo->tbo.ttm->num_pages);
>> -		kvfree(mem->user_pages);
>> -	}
>> -
>>   	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
>>   	if (unlikely(ret))
>>   		return ret;
>> @@ -1751,25 +1720,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>>   
>>   		bo = mem->bo;
>>   
>> -		if (!mem->user_pages) {
>> -			mem->user_pages =
>> -				kvmalloc_array(bo->tbo.ttm->num_pages,
>> -						 sizeof(struct page *),
>> -						 GFP_KERNEL | __GFP_ZERO);
>> -			if (!mem->user_pages) {
>> -				pr_err("%s: Failed to allocate pages array\n",
>> -				       __func__);
>> -				return -ENOMEM;
>> -			}
>> -		} else if (mem->user_pages[0]) {
>> -			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
>> -		}
>> -
>>   		/* Get updated user pages */
>>   		ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
>> -						   mem->user_pages);
>> +						   bo->tbo.ttm->pages);
>>   		if (ret) {
>> -			mem->user_pages[0] = NULL;
>> +			bo->tbo.ttm->pages[0] = NULL;
>>   			pr_info("%s: Failed to get user pages: %d\n",
>>   				__func__, ret);
>>   			/* Pretend it succeeded. It will fail later
>> @@ -1778,12 +1733,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>>   			 * stalled user mode queues.
>>   			 */
>>   		}
>> -
>> -		/* Mark the BO as valid unless it was invalidated
>> -		 * again concurrently
>> -		 */
>> -		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
>> -			return -EAGAIN;
> I think you're removing this to make sure we exactly pair up
> amdgpu_ttm_tt_get_user_pages and amdgpu_ttm_tt_get_user_pages_done
> calls. But there are other error conditions in
> validate_invalid_user_pages that would skip the
> amdgpu_ttm_tt_get_user_pages_done calls. That will need to be handled
> properly in the error-handling code paths in validate_invalid_user_pages.
>
>
>>   	}
>>   
>>   	return 0;
>> @@ -1863,10 +1812,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>>   
>>   		bo = mem->bo;
>>   
>> -		/* Copy pages array and validate the BO if we got user pages */
>> -		if (mem->user_pages[0]) {
>> -			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
>> -						     mem->user_pages);
>> +		/* Validate the BO if we got user pages */
>> +		if (bo->tbo.ttm->pages[0]) {
>>   			amdgpu_bo_placement_from_domain(bo, mem->domain);
>>   			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>>   			if (ret) {
>> @@ -1875,16 +1822,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>>   			}
>>   		}
>>   
>> -		/* Validate succeeded, now the BO owns the pages, free
>> -		 * our copy of the pointer array. Put this BO back on
>> -		 * the userptr_valid_list. If we need to revalidate
>> -		 * it, we need to start from scratch.
>> -		 */
>> -		kvfree(mem->user_pages);
>> -		mem->user_pages = NULL;
>>   		list_move_tail(&mem->validate_list.head,
>>   			       &process_info->userptr_valid_list);
>>   
>> +		/* Stop HMM track the userptr update. We dont check the return
>> +		 * value for concurrent CPU page table update because we will
>> +		 * reschedule the restore worker if process_info->evicted_bos
>> +		 * is updated.
>> +		 */
>> +		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
>> +
>>   		/* Update mapping. If the BO was not validated
>>   		 * (because we couldn't get user pages), this will
>>   		 * clear the page table entries, which will result in
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> index 5c79da8e1150..3842183970cc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> @@ -200,8 +200,6 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
>>   
>>   		if (!bo->parent)
>>   			list_add_tail(&e->tv.head, &bucket[priority]);
>> -
>> -		e->user_pages = NULL;
>>   	}
>>   
>>   	/* Connect the sorted buckets in the output list. */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> index 7c5f5d1601e6..4beab2de09b3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> @@ -35,8 +35,7 @@ struct amdgpu_bo_list_entry {
>>   	struct ttm_validate_buffer	tv;
>>   	struct amdgpu_bo_va		*bo_va;
>>   	uint32_t			priority;
>> -	struct page			**user_pages;
>> -	int				user_invalidated;
>> +	bool				user_invalidated;
>>   };
>>   
>>   struct amdgpu_bo_list {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> [snip]
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> [snip]
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> index 5d518d2bb9be..c8002ee1d415 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> @@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
>>   			true, false, MAX_SCHEDULE_TIMEOUT);
>>   		if (r <= 0)
>>   			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
>> -
>> -		amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
>>   	}
>>   }
>>   
>> @@ -353,15 +351,16 @@ static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>   /**
>>    * amdgpu_mn_get - create HMM mirror context
>>    *
>> + * @mm: the mm struct
>>    * @adev: amdgpu device pointer
>>    * @type: type of MMU notifier context
>>    *
>> - * Creates a HMM mirror context for current->mm.
>> + * Creates a HMM mirror context for mm.
>>    */
>> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
> I think this change isn't needed any more. All callers of this function
> specify mm as current->mm anyway.
>
>
>> +				struct amdgpu_device *adev,
>>   				enum amdgpu_mn_type type)
>>   {
>> -	struct mm_struct *mm = current->mm;
>>   	struct amdgpu_mn *amn;
>>   	unsigned long key = AMDGPU_MN_KEY(mm, type);
>>   	int r;
>> @@ -431,7 +430,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>>   	struct list_head bos;
>>   	struct interval_tree_node *it;
>>   
>> -	amn = amdgpu_mn_get(adev, type);
>> +	amn = amdgpu_mn_get(current->mm, adev, type);
>>   	if (IS_ERR(amn))
>>   		return PTR_ERR(amn);
>>   
>> @@ -513,3 +512,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>   	mutex_unlock(&adev->mn_lock);
>>   }
>>   
>> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */
>> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
>> +		(1 << 0), /* HMM_PFN_VALID */
>> +		(1 << 1), /* HMM_PFN_WRITE */
>> +		0 /* HMM_PFN_DEVICE_PRIVATE */
>> +};
>> +
>> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
>> +		0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
>> +		0, /* HMM_PFN_NONE */
>> +		0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
>> +};
>> +
>> +void amdgpu_hmm_init_range(struct hmm_range *range)
>> +{
>> +	if (range) {
>> +		range->flags = hmm_range_flags;
>> +		range->values = hmm_range_values;
>> +		range->pfn_shift = PAGE_SHIFT;
>> +		range->pfns = NULL;
>> +		INIT_LIST_HEAD(&range->list);
>> +	}
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> index 0a51fd00021c..4c5e86847a3b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> @@ -25,9 +25,10 @@
>>   #define __AMDGPU_MN_H__
>>   
>>   /*
>> - * MMU Notifier
>> + * HMM mirror
>>    */
>>   struct amdgpu_mn;
>> +struct hmm_range;
>>   
>>   enum amdgpu_mn_type {
>>   	AMDGPU_MN_TYPE_GFX,
>> @@ -37,10 +38,12 @@ enum amdgpu_mn_type {
>>   #if defined(CONFIG_HMM_MIRROR)
>>   void amdgpu_mn_lock(struct amdgpu_mn *mn);
>>   void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
>> +				struct amdgpu_device *adev,
>>   				enum amdgpu_mn_type type);
>>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
>>   void amdgpu_mn_unregister(struct amdgpu_bo *bo);
>> +void amdgpu_hmm_init_range(struct hmm_range *range);
>>   #else
>>   static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
>>   static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index c91ec3101d00..33057d6114c7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -43,6 +43,7 @@
>>   #include <linux/pagemap.h>
>>   #include <linux/debugfs.h>
>>   #include <linux/iommu.h>
>> +#include <linux/hmm.h>
>>   #include "amdgpu.h"
>>   #include "amdgpu_object.h"
>>   #include "amdgpu_trace.h"
>> @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
>>   /*
>>    * TTM backend functions.
>>    */
>> -struct amdgpu_ttm_gup_task_list {
>> -	struct list_head	list;
>> -	struct task_struct	*task;
>> -};
>> -
>>   struct amdgpu_ttm_tt {
>>   	struct ttm_dma_tt	ttm;
>>   	u64			offset;
>> @@ -718,107 +714,105 @@ struct amdgpu_ttm_tt {
>>   	uint32_t		userflags;
>>   	spinlock_t              guptasklock;
>>   	struct list_head        guptasks;
>> -	atomic_t		mmu_invalidations;
>> -	uint32_t		last_set_pages;
>> +	struct hmm_range	range;
>>   };
>>   
>>   /**
>> - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
>> - * pointer to memory
>> + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
>> + * memory and start HMM tracking CPU page table update
>>    *
>> - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
>> - * This provides a wrapper around the get_user_pages() call to provide
>> - * device accessible pages that back user memory.
>> + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
>> + * once afterwards to stop HMM tracking
>>    */
>>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>   {
>>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>   	struct mm_struct *mm = gtt->usertask->mm;
>> -	unsigned int flags = 0;
>> -	unsigned pinned = 0;
>> -	int r;
>> +	unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
>> +	struct hmm_range *range = &gtt->range;
>> +	int r, i;
>>   
>>   	if (!mm) /* Happens during process shutdown */
>>   		return -ESRCH;
>>   
>> -	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
>> -		flags |= FOLL_WRITE;
>> +	amdgpu_hmm_init_range(range);
>>   
>>   	down_read(&mm->mmap_sem);
>>   
>> +	range->vma = find_vma(mm, gtt->userptr);
> There is still an assumption here that our userptr BOs cover only a
> single VM. This is a new limitation with HMM that the previous
> get_user_pages-based implementation did not have. We're probably OK with
> this for now. But we may run into regressions if there are any ROCm
> application that register user memory spanning multiple VMAs. If that
> happens, we may need to generalize this so that each userptr BO can have
> multiple HMM ranges.
>
>
>>   	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
>>   		/*
>>   		 * check that we only use anonymous memory to prevent problems
>>   		 * with writeback
>>   		 */
>> -		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
>> -		struct vm_area_struct *vma;
>> +		struct vm_area_struct *vma = range->vma;
>>   
>> -		vma = find_vma(mm, gtt->userptr);
>> -		if (!vma || vma->vm_file || vma->vm_end < end) {
>> -			up_read(&mm->mmap_sem);
>> -			return -EPERM;
>> -		}
>> +		if (!vma || vma->vm_file || vma->vm_end < end)
>> +			range->vma = NULL;
>>   	}
>>   
>> -	/* loop enough times using contiguous pages of memory */
>> -	do {
>> -		unsigned num_pages = ttm->num_pages - pinned;
>> -		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
>> -		struct page **p = pages + pinned;
>> -		struct amdgpu_ttm_gup_task_list guptask;
>> -
>> -		guptask.task = current;
>> -		spin_lock(&gtt->guptasklock);
>> -		list_add(&guptask.list, &gtt->guptasks);
>> -		spin_unlock(&gtt->guptasklock);
>> +	if (!range->vma) {
>> +		r = -EPERM;
> You're treating permissions errors and errors finding a VMA the same
> way. And I think you don't detect failures to find a VMA properly in the
> first place. find_vma may return a VMA that doesn't include the address
> provided. It's documented as "Look up the first VMA which satisfies
> addr < vm_end,  NULL if none." You should also check that the VMA covers
> the entire address range of the userptr BO. There is a helper function
> in linux/mm.h that does that: range_in_vma(range->vma, gtt->userptr,
> gtt->userptr + gtt->userptr + ttm->num_pages * PAGE_SIZE). You should
> check this condition in all cases.
>
> There should be two distinct types of errors. If find_vma fails to find
> a matching VMA, that should be -EFAULT. If the gtt->userflags don't
> match the VMA properties, that should be -EPERM.
>
>
>> +		goto out;
>> +	}
>>   
>> -		if (mm == current->mm)
>> -			r = get_user_pages(userptr, num_pages, flags, p, NULL);
>> -		else
>> -			r = get_user_pages_remote(gtt->usertask,
>> -					mm, userptr, num_pages,
>> -					flags, p, NULL, NULL);
>> +	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
>> +				     GFP_KERNEL);
>> +	if (range->pfns == NULL) {
>> +		r = -ENOMEM;
>> +		goto out;
>> +	}
>> +	range->start = gtt->userptr;
>> +	range->end = end;
>>   
>> -		spin_lock(&gtt->guptasklock);
>> -		list_del(&guptask.list);
>> -		spin_unlock(&gtt->guptasklock);
>> +	for (i = 0; i < ttm->num_pages; i++) {
>> +		range->pfns[i] = range->flags[HMM_PFN_VALID];
>> +		range->pfns[i] |= amdgpu_ttm_tt_is_readonly(ttm) ?
>> +					0 : range->flags[HMM_PFN_WRITE];
>> +	}
> You could optimize this slightly. You don't need to call
> amdgpu_ttm_tt_is_readonly in each loop iteration. The flags are the same
> for all pfns. So do the conditional thing on range->pfns[0], and then
> just copy that to all the remaining pfns.
>
>
>>   
>> -		if (r < 0)
>> -			goto release_pages;
>> +	/* This may trigger page table update */
>> +	r = hmm_vma_fault(range, true);
>> +	if (r)
>> +		goto out_free_pfns;
>>   
>> -		pinned += r;
>> +	up_read(&mm->mmap_sem);
>>   
>> -	} while (pinned < ttm->num_pages);
>> +	for (i = 0; i < ttm->num_pages; i++)
>> +		pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
>>   
>> -	up_read(&mm->mmap_sem);
>>   	return 0;
>>   
>> -release_pages:
>> -	release_pages(pages, pinned);
>> +out_free_pfns:
>> +	kvfree(range->pfns);
>> +	range->pfns = NULL;
>> +out:
>>   	up_read(&mm->mmap_sem);
>>   	return r;
>>   }
>>   
>>   /**
>> - * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
>> + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
>> + * Check if the pages backing this ttm range have been invalidated
>>    *
>> - * Called by amdgpu_cs_list_validate(). This creates the page list
>> - * that backs user memory and will ultimately be mapped into the device
>> - * address space.
>> + * Returns: true if pages are invalidated since the last time they've been set
> I still don't like that you're inverting the return value of
> hmm_vma_range_done. This will get confusing in the future. Could this
> function instead return "true if pages are still valid"?
>
>
>>    */
>> -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
>>   {
>>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> -	unsigned i;
>> +	bool r;
>>   
>> -	gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
>> -	for (i = 0; i < ttm->num_pages; ++i) {
>> -		if (ttm->pages[i])
>> -			put_page(ttm->pages[i]);
>> +	if (gtt == NULL || !gtt->userptr)
>> +		return false;
> Use "!gtt" instead of "gtt == NULL". Also, returning "false" here means
> the pages are still valid. That seems like the wrong answer for this
> kind of failure. "false" would make sense here if you followed my
> suggestion above to return true, if the pages are still valid.
>
>
>>   
>> -		ttm->pages[i] = pages ? pages[i] : NULL;
>> +	r = hmm_vma_range_done(&gtt->range);
>> +
>> +	if (gtt->range.pfns) {
>> +		kvfree(gtt->range.pfns);
>> +		gtt->range.pfns = NULL;
>>   	}
>> +
>> +	return !r;
> I still don't like that you're inverting the return value of
> hmm_vma_range_done. This will get confusing in the future.
>
> Regards,
>    Felix
>
>
>>   }
>>   
>>   /**
>> @@ -903,9 +897,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
>>   	/* unmap the pages mapped to the device */
>>   	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
>>   
>> -	/* mark the pages as dirty */
>> -	amdgpu_ttm_tt_mark_user_pages(ttm);
>> -
>>   	sg_free_table(ttm->sg);
>>   }
>>   
>> @@ -1207,7 +1198,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
>>   	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
>>   
>>   	if (gtt && gtt->userptr) {
>> -		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
>> +		memset(ttm->pages, 0, ttm->num_pages * sizeof(struct page *));
>>   		kfree(ttm->sg);
>>   		ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
>>   		return;
>> @@ -1258,8 +1249,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>>   
>>   	spin_lock_init(&gtt->guptasklock);
>>   	INIT_LIST_HEAD(&gtt->guptasks);
>> -	atomic_set(&gtt->mmu_invalidations, 0);
>> -	gtt->last_set_pages = 0;
>>   
>>   	return 0;
>>   }
>> @@ -1289,7 +1278,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>>   				  unsigned long end)
>>   {
>>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> -	struct amdgpu_ttm_gup_task_list *entry;
>>   	unsigned long size;
>>   
>>   	if (gtt == NULL || !gtt->userptr)
>> @@ -1302,48 +1290,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>>   	if (gtt->userptr > end || gtt->userptr + size <= start)
>>   		return false;
>>   
>> -	/* Search the lists of tasks that hold this mapping and see
>> -	 * if current is one of them.  If it is return false.
>> -	 */
>> -	spin_lock(&gtt->guptasklock);
>> -	list_for_each_entry(entry, &gtt->guptasks, list) {
>> -		if (entry->task == current) {
>> -			spin_unlock(&gtt->guptasklock);
>> -			return false;
>> -		}
>> -	}
>> -	spin_unlock(&gtt->guptasklock);
>> -
>> -	atomic_inc(&gtt->mmu_invalidations);
>> -
>>   	return true;
>>   }
>>   
>>   /**
>> - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
>> + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
>>    */
>> -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
>> -				       int *last_invalidated)
>> -{
>> -	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> -	int prev_invalidated = *last_invalidated;
>> -
>> -	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
>> -	return prev_invalidated != *last_invalidated;
>> -}
>> -
>> -/**
>> - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
>> - * been invalidated since the last time they've been set?
>> - */
>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
>>   {
>>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>   
>>   	if (gtt == NULL || !gtt->userptr)
>>   		return false;
>>   
>> -	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
>> +	return true;
>>   }
>>   
>>   /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index b5b2d101f7db..c5a1e78a09b1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -102,7 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
>>   int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>>   
>>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
>> -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
>>   void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
>>   int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>>   				     uint32_t flags);
>> @@ -112,7 +112,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>>   				  unsigned long end);
>>   bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
>>   				       int *last_invalidated);
>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
>>   bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
>>   uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
>>   uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index f059ec314f2b..ab8fa84b27a6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -619,7 +619,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>>   	entry->tv.bo = &vm->root.base.bo->tbo;
>>   	/* One for the VM updates, one for TTM and one for the CS job */
>>   	entry->tv.num_shared = 3;
>> -	entry->user_pages = NULL;
>>   	list_add(&entry->tv.head, validated);
>>   }
>>   
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v3
       [not found]         ` <b5ff9fa4-97c9-4524-d4dd-34c8003274fa-5C7GfCeVMHo@public.gmane.org>
  2018-12-11  8:27           ` Christian König
@ 2018-12-13 20:56           ` Yang, Philip
  1 sibling, 0 replies; 13+ messages in thread
From: Yang, Philip @ 2018-12-13 20:56 UTC (permalink / raw)
  To: Kuehling, Felix, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 2018-12-10 7:12 p.m., Kuehling, Felix wrote:
> This is a nice improvement from the last version. I still see some
> potential problems. See inline ...
>
> I'm skipping over the CS and GEM parts. I hope Christian can review
> those parts.
>
> On 2018-12-06 4:02 p.m., Yang, Philip wrote:
>> Use HMM helper function hmm_vma_fault() to get physical pages backing
>> userptr and start CPU page table update track of those pages. Then use
>> hmm_vma_range_done() to check if those pages are updated before
>> amdgpu_cs_submit for gfx or before user queues are resumed for kfd.
>>
>> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
>> from scratch, for kfd, restore worker is rescheduled to retry.
>>
>> HMM simplify the CPU page table concurrent update check, so remove
>> guptasklock, mmu_invalidations, last_set_pages fields from
>> amdgpu_ttm_tt struct.
>>
>> HMM does not pin the page (increase page ref count), so remove related
>> operations like release_pages(), put_page(), mark_page_dirty().
>>
>> Change-Id: Id2d3130378b44a774e0d77156102a20a203b5036
>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |   1 -
>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |  81 ++------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c   |   2 -
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h   |   3 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        | 188 +++++++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c       |  14 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c        |  34 +++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h        |   7 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c       | 160 ++++++---------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h       |   4 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        |   1 -
>>   11 files changed, 206 insertions(+), 289 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> index bcf587b4ba98..b492dd9e541a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>> @@ -62,7 +62,6 @@ struct kgd_mem {
>>   
>>   	atomic_t invalid;
>>   	struct amdkfd_process_info *process_info;
>> -	struct page **user_pages;
>>   
>>   	struct amdgpu_sync sync;
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index b29ef088fa14..75833b40507b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -580,28 +580,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
>>   		goto out;
>>   	}
>>   
>> -	/* If no restore worker is running concurrently, user_pages
>> -	 * should not be allocated
>> -	 */
>> -	WARN(mem->user_pages, "Leaking user_pages array");
>> -
>> -	mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
>> -					   sizeof(struct page *),
>> -					   GFP_KERNEL | __GFP_ZERO);
>> -	if (!mem->user_pages) {
>> -		pr_err("%s: Failed to allocate pages array\n", __func__);
>> -		ret = -ENOMEM;
>> -		goto unregister_out;
>> -	}
>> -
>> -	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
>> +	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
>>   	if (ret) {
>>   		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
>> -		goto free_out;
>> +		goto unregister_out;
>>   	}
>>   
>> -	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
>> -
>>   	ret = amdgpu_bo_reserve(bo, true);
>>   	if (ret) {
>>   		pr_err("%s: Failed to reserve BO\n", __func__);
>> @@ -614,11 +598,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
>>   	amdgpu_bo_unreserve(bo);
>>   
>>   release_out:
>> -	if (ret)
>> -		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
>> -free_out:
>> -	kvfree(mem->user_pages);
>> -	mem->user_pages = NULL;
>> +	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
>>   unregister_out:
>>   	if (ret)
>>   		amdgpu_mn_unregister(bo);
>> @@ -677,7 +657,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
>>   	ctx->kfd_bo.priority = 0;
>>   	ctx->kfd_bo.tv.bo = &bo->tbo;
>>   	ctx->kfd_bo.tv.num_shared = 1;
>> -	ctx->kfd_bo.user_pages = NULL;
>>   	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>>   
>>   	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
>> @@ -741,7 +720,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
>>   	ctx->kfd_bo.priority = 0;
>>   	ctx->kfd_bo.tv.bo = &bo->tbo;
>>   	ctx->kfd_bo.tv.num_shared = 1;
>> -	ctx->kfd_bo.user_pages = NULL;
>>   	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>>   
>>   	i = 0;
>> @@ -1329,15 +1307,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
>>   	list_del(&bo_list_entry->head);
>>   	mutex_unlock(&process_info->lock);
>>   
>> -	/* Free user pages if necessary */
>> -	if (mem->user_pages) {
>> -		pr_debug("%s: Freeing user_pages array\n", __func__);
>> -		if (mem->user_pages[0])
>> -			release_pages(mem->user_pages,
>> -					mem->bo->tbo.ttm->num_pages);
>> -		kvfree(mem->user_pages);
>> -	}
>> -
>>   	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
>>   	if (unlikely(ret))
>>   		return ret;
>> @@ -1751,25 +1720,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>>   
>>   		bo = mem->bo;
>>   
>> -		if (!mem->user_pages) {
>> -			mem->user_pages =
>> -				kvmalloc_array(bo->tbo.ttm->num_pages,
>> -						 sizeof(struct page *),
>> -						 GFP_KERNEL | __GFP_ZERO);
>> -			if (!mem->user_pages) {
>> -				pr_err("%s: Failed to allocate pages array\n",
>> -				       __func__);
>> -				return -ENOMEM;
>> -			}
>> -		} else if (mem->user_pages[0]) {
>> -			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
>> -		}
>> -
>>   		/* Get updated user pages */
>>   		ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
>> -						   mem->user_pages);
>> +						   bo->tbo.ttm->pages);
>>   		if (ret) {
>> -			mem->user_pages[0] = NULL;
>> +			bo->tbo.ttm->pages[0] = NULL;
>>   			pr_info("%s: Failed to get user pages: %d\n",
>>   				__func__, ret);
>>   			/* Pretend it succeeded. It will fail later
>> @@ -1778,12 +1733,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>>   			 * stalled user mode queues.
>>   			 */
>>   		}
>> -
>> -		/* Mark the BO as valid unless it was invalidated
>> -		 * again concurrently
>> -		 */
>> -		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
>> -			return -EAGAIN;
> I think you're removing this to make sure we exactly pair up
> amdgpu_ttm_tt_get_user_pages and amdgpu_ttm_tt_get_user_pages_done
> calls. But there are other error conditions in
> validate_invalid_user_pages that would skip the
> amdgpu_ttm_tt_get_user_pages_done calls. That will need to be handled
> properly in the error-handling code paths in validate_invalid_user_pages.
>
Yes. At end of validate_invalid_user_pages, add loop to call 
amdgpu_ttm_tt_get_user_pages_done
for process_info->userptr_inval_list, this cover all error-handling paths
>>   	}
>>   
>>   	return 0;
>> @@ -1863,10 +1812,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>>   
>>   		bo = mem->bo;
>>   
>> -		/* Copy pages array and validate the BO if we got user pages */
>> -		if (mem->user_pages[0]) {
>> -			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
>> -						     mem->user_pages);
>> +		/* Validate the BO if we got user pages */
>> +		if (bo->tbo.ttm->pages[0]) {
>>   			amdgpu_bo_placement_from_domain(bo, mem->domain);
>>   			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
>>   			if (ret) {
>> @@ -1875,16 +1822,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>>   			}
>>   		}
>>   
>> -		/* Validate succeeded, now the BO owns the pages, free
>> -		 * our copy of the pointer array. Put this BO back on
>> -		 * the userptr_valid_list. If we need to revalidate
>> -		 * it, we need to start from scratch.
>> -		 */
>> -		kvfree(mem->user_pages);
>> -		mem->user_pages = NULL;
>>   		list_move_tail(&mem->validate_list.head,
>>   			       &process_info->userptr_valid_list);
>>   
>> +		/* Stop HMM track the userptr update. We dont check the return
>> +		 * value for concurrent CPU page table update because we will
>> +		 * reschedule the restore worker if process_info->evicted_bos
>> +		 * is updated.
>> +		 */
>> +		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
>> +
>>   		/* Update mapping. If the BO was not validated
>>   		 * (because we couldn't get user pages), this will
>>   		 * clear the page table entries, which will result in
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> index 5c79da8e1150..3842183970cc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> @@ -200,8 +200,6 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
>>   
>>   		if (!bo->parent)
>>   			list_add_tail(&e->tv.head, &bucket[priority]);
>> -
>> -		e->user_pages = NULL;
>>   	}
>>   
>>   	/* Connect the sorted buckets in the output list. */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> index 7c5f5d1601e6..4beab2de09b3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
>> @@ -35,8 +35,7 @@ struct amdgpu_bo_list_entry {
>>   	struct ttm_validate_buffer	tv;
>>   	struct amdgpu_bo_va		*bo_va;
>>   	uint32_t			priority;
>> -	struct page			**user_pages;
>> -	int				user_invalidated;
>> +	bool				user_invalidated;
>>   };
>>   
>>   struct amdgpu_bo_list {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> [snip]
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> [snip]
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> index 5d518d2bb9be..c8002ee1d415 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> @@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
>>   			true, false, MAX_SCHEDULE_TIMEOUT);
>>   		if (r <= 0)
>>   			DRM_ERROR("(%ld) failed to wait for user bo\n", r);
>> -
>> -		amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
>>   	}
>>   }
>>   
>> @@ -353,15 +351,16 @@ static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>   /**
>>    * amdgpu_mn_get - create HMM mirror context
>>    *
>> + * @mm: the mm struct
>>    * @adev: amdgpu device pointer
>>    * @type: type of MMU notifier context
>>    *
>> - * Creates a HMM mirror context for current->mm.
>> + * Creates a HMM mirror context for mm.
>>    */
>> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
> I think this change isn't needed any more. All callers of this function
> specify mm as current->mm anyway.
>
Yes, removed this change.
>> +				struct amdgpu_device *adev,
>>   				enum amdgpu_mn_type type)
>>   {
>> -	struct mm_struct *mm = current->mm;
>>   	struct amdgpu_mn *amn;
>>   	unsigned long key = AMDGPU_MN_KEY(mm, type);
>>   	int r;
>> @@ -431,7 +430,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>>   	struct list_head bos;
>>   	struct interval_tree_node *it;
>>   
>> -	amn = amdgpu_mn_get(adev, type);
>> +	amn = amdgpu_mn_get(current->mm, adev, type);
>>   	if (IS_ERR(amn))
>>   		return PTR_ERR(amn);
>>   
>> @@ -513,3 +512,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>   	mutex_unlock(&adev->mn_lock);
>>   }
>>   
>> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */
>> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
>> +		(1 << 0), /* HMM_PFN_VALID */
>> +		(1 << 1), /* HMM_PFN_WRITE */
>> +		0 /* HMM_PFN_DEVICE_PRIVATE */
>> +};
>> +
>> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
>> +		0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
>> +		0, /* HMM_PFN_NONE */
>> +		0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
>> +};
>> +
>> +void amdgpu_hmm_init_range(struct hmm_range *range)
>> +{
>> +	if (range) {
>> +		range->flags = hmm_range_flags;
>> +		range->values = hmm_range_values;
>> +		range->pfn_shift = PAGE_SHIFT;
>> +		range->pfns = NULL;
>> +		INIT_LIST_HEAD(&range->list);
>> +	}
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> index 0a51fd00021c..4c5e86847a3b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> @@ -25,9 +25,10 @@
>>   #define __AMDGPU_MN_H__
>>   
>>   /*
>> - * MMU Notifier
>> + * HMM mirror
>>    */
>>   struct amdgpu_mn;
>> +struct hmm_range;
>>   
>>   enum amdgpu_mn_type {
>>   	AMDGPU_MN_TYPE_GFX,
>> @@ -37,10 +38,12 @@ enum amdgpu_mn_type {
>>   #if defined(CONFIG_HMM_MIRROR)
>>   void amdgpu_mn_lock(struct amdgpu_mn *mn);
>>   void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm,
>> +				struct amdgpu_device *adev,
>>   				enum amdgpu_mn_type type);
>>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
>>   void amdgpu_mn_unregister(struct amdgpu_bo *bo);
>> +void amdgpu_hmm_init_range(struct hmm_range *range);
>>   #else
>>   static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
>>   static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index c91ec3101d00..33057d6114c7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -43,6 +43,7 @@
>>   #include <linux/pagemap.h>
>>   #include <linux/debugfs.h>
>>   #include <linux/iommu.h>
>> +#include <linux/hmm.h>
>>   #include "amdgpu.h"
>>   #include "amdgpu_object.h"
>>   #include "amdgpu_trace.h"
>> @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
>>   /*
>>    * TTM backend functions.
>>    */
>> -struct amdgpu_ttm_gup_task_list {
>> -	struct list_head	list;
>> -	struct task_struct	*task;
>> -};
>> -
>>   struct amdgpu_ttm_tt {
>>   	struct ttm_dma_tt	ttm;
>>   	u64			offset;
>> @@ -718,107 +714,105 @@ struct amdgpu_ttm_tt {
>>   	uint32_t		userflags;
>>   	spinlock_t              guptasklock;
>>   	struct list_head        guptasks;
>> -	atomic_t		mmu_invalidations;
>> -	uint32_t		last_set_pages;
>> +	struct hmm_range	range;
>>   };
>>   
>>   /**
>> - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
>> - * pointer to memory
>> + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
>> + * memory and start HMM tracking CPU page table update
>>    *
>> - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
>> - * This provides a wrapper around the get_user_pages() call to provide
>> - * device accessible pages that back user memory.
>> + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
>> + * once afterwards to stop HMM tracking
>>    */
>>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
>>   {
>>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>   	struct mm_struct *mm = gtt->usertask->mm;
>> -	unsigned int flags = 0;
>> -	unsigned pinned = 0;
>> -	int r;
>> +	unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
>> +	struct hmm_range *range = &gtt->range;
>> +	int r, i;
>>   
>>   	if (!mm) /* Happens during process shutdown */
>>   		return -ESRCH;
>>   
>> -	if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
>> -		flags |= FOLL_WRITE;
>> +	amdgpu_hmm_init_range(range);
>>   
>>   	down_read(&mm->mmap_sem);
>>   
>> +	range->vma = find_vma(mm, gtt->userptr);
> There is still an assumption here that our userptr BOs cover only a
> single VM. This is a new limitation with HMM that the previous
> get_user_pages-based implementation did not have. We're probably OK with
> this for now. But we may run into regressions if there are any ROCm
> application that register user memory spanning multiple VMAs. If that
> happens, we may need to generalize this so that each userptr BO can have
> multiple HMM ranges.
ok, thanks.
>
>>   	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
>>   		/*
>>   		 * check that we only use anonymous memory to prevent problems
>>   		 * with writeback
>>   		 */
>> -		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
>> -		struct vm_area_struct *vma;
>> +		struct vm_area_struct *vma = range->vma;
>>   
>> -		vma = find_vma(mm, gtt->userptr);
>> -		if (!vma || vma->vm_file || vma->vm_end < end) {
>> -			up_read(&mm->mmap_sem);
>> -			return -EPERM;
>> -		}
>> +		if (!vma || vma->vm_file || vma->vm_end < end)
>> +			range->vma = NULL;
>>   	}
>>   
>> -	/* loop enough times using contiguous pages of memory */
>> -	do {
>> -		unsigned num_pages = ttm->num_pages - pinned;
>> -		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
>> -		struct page **p = pages + pinned;
>> -		struct amdgpu_ttm_gup_task_list guptask;
>> -
>> -		guptask.task = current;
>> -		spin_lock(&gtt->guptasklock);
>> -		list_add(&guptask.list, &gtt->guptasks);
>> -		spin_unlock(&gtt->guptasklock);
>> +	if (!range->vma) {
>> +		r = -EPERM;
> You're treating permissions errors and errors finding a VMA the same
> way. And I think you don't detect failures to find a VMA properly in the
> first place. find_vma may return a VMA that doesn't include the address
> provided. It's documented as "Look up the first VMA which satisfies
> addr < vm_end,  NULL if none." You should also check that the VMA covers
> the entire address range of the userptr BO. There is a helper function
> in linux/mm.h that does that: range_in_vma(range->vma, gtt->userptr,
> gtt->userptr + gtt->userptr + ttm->num_pages * PAGE_SIZE). You should
> check this condition in all cases.
>
> There should be two distinct types of errors. If find_vma fails to find
> a matching VMA, that should be -EFAULT. If the gtt->userflags don't
> match the VMA properties, that should be -EPERM.
>
Fixed, add range_in_vma check for both cases
>> +		goto out;
>> +	}
>>   
>> -		if (mm == current->mm)
>> -			r = get_user_pages(userptr, num_pages, flags, p, NULL);
>> -		else
>> -			r = get_user_pages_remote(gtt->usertask,
>> -					mm, userptr, num_pages,
>> -					flags, p, NULL, NULL);
>> +	range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
>> +				     GFP_KERNEL);
>> +	if (range->pfns == NULL) {
>> +		r = -ENOMEM;
>> +		goto out;
>> +	}
>> +	range->start = gtt->userptr;
>> +	range->end = end;
>>   
>> -		spin_lock(&gtt->guptasklock);
>> -		list_del(&guptask.list);
>> -		spin_unlock(&gtt->guptasklock);
>> +	for (i = 0; i < ttm->num_pages; i++) {
>> +		range->pfns[i] = range->flags[HMM_PFN_VALID];
>> +		range->pfns[i] |= amdgpu_ttm_tt_is_readonly(ttm) ?
>> +					0 : range->flags[HMM_PFN_WRITE];
>> +	}
> You could optimize this slightly. You don't need to call
> amdgpu_ttm_tt_is_readonly in each loop iteration. The flags are the same
> for all pfns. So do the conditional thing on range->pfns[0], and then
> just copy that to all the remaining pfns.
>
Done, thanks.
>>   
>> -		if (r < 0)
>> -			goto release_pages;
>> +	/* This may trigger page table update */
>> +	r = hmm_vma_fault(range, true);
>> +	if (r)
>> +		goto out_free_pfns;
>>   
>> -		pinned += r;
>> +	up_read(&mm->mmap_sem);
>>   
>> -	} while (pinned < ttm->num_pages);
>> +	for (i = 0; i < ttm->num_pages; i++)
>> +		pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
>>   
>> -	up_read(&mm->mmap_sem);
>>   	return 0;
>>   
>> -release_pages:
>> -	release_pages(pages, pinned);
>> +out_free_pfns:
>> +	kvfree(range->pfns);
>> +	range->pfns = NULL;
>> +out:
>>   	up_read(&mm->mmap_sem);
>>   	return r;
>>   }
>>   
>>   /**
>> - * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
>> + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
>> + * Check if the pages backing this ttm range have been invalidated
>>    *
>> - * Called by amdgpu_cs_list_validate(). This creates the page list
>> - * that backs user memory and will ultimately be mapped into the device
>> - * address space.
>> + * Returns: true if pages are invalidated since the last time they've been set
> I still don't like that you're inverting the return value of
> hmm_vma_range_done. This will get confusing in the future. Could this
> function instead return "true if pages are still valid"?
>
Done, return true if pages are still valid
>>    */
>> -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
>>   {
>>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> -	unsigned i;
>> +	bool r;
>>   
>> -	gtt->last_set_pages = atomic_read(&gtt->mmu_invalidations);
>> -	for (i = 0; i < ttm->num_pages; ++i) {
>> -		if (ttm->pages[i])
>> -			put_page(ttm->pages[i]);
>> +	if (gtt == NULL || !gtt->userptr)
>> +		return false;
> Use "!gtt" instead of "gtt == NULL". Also, returning "false" here means
> the pages are still valid. That seems like the wrong answer for this
> kind of failure. "false" would make sense here if you followed my
> suggestion above to return true, if the pages are still valid.
Done, thanks.
>
>>   
>> -		ttm->pages[i] = pages ? pages[i] : NULL;
>> +	r = hmm_vma_range_done(&gtt->range);
>> +
>> +	if (gtt->range.pfns) {
>> +		kvfree(gtt->range.pfns);
>> +		gtt->range.pfns = NULL;
>>   	}
>> +
>> +	return !r;
> I still don't like that you're inverting the return value of
> hmm_vma_range_done. This will get confusing in the future.
>
> Regards,
>    Felix
>
>
>>   }
>>   
>>   /**
>> @@ -903,9 +897,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
>>   	/* unmap the pages mapped to the device */
>>   	dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
>>   
>> -	/* mark the pages as dirty */
>> -	amdgpu_ttm_tt_mark_user_pages(ttm);
>> -
>>   	sg_free_table(ttm->sg);
>>   }
>>   
>> @@ -1207,7 +1198,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
>>   	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
>>   
>>   	if (gtt && gtt->userptr) {
>> -		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
>> +		memset(ttm->pages, 0, ttm->num_pages * sizeof(struct page *));
>>   		kfree(ttm->sg);
>>   		ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
>>   		return;
>> @@ -1258,8 +1249,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>>   
>>   	spin_lock_init(&gtt->guptasklock);
>>   	INIT_LIST_HEAD(&gtt->guptasks);
>> -	atomic_set(&gtt->mmu_invalidations, 0);
>> -	gtt->last_set_pages = 0;
>>   
>>   	return 0;
>>   }
>> @@ -1289,7 +1278,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>>   				  unsigned long end)
>>   {
>>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> -	struct amdgpu_ttm_gup_task_list *entry;
>>   	unsigned long size;
>>   
>>   	if (gtt == NULL || !gtt->userptr)
>> @@ -1302,48 +1290,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>>   	if (gtt->userptr > end || gtt->userptr + size <= start)
>>   		return false;
>>   
>> -	/* Search the lists of tasks that hold this mapping and see
>> -	 * if current is one of them.  If it is return false.
>> -	 */
>> -	spin_lock(&gtt->guptasklock);
>> -	list_for_each_entry(entry, &gtt->guptasks, list) {
>> -		if (entry->task == current) {
>> -			spin_unlock(&gtt->guptasklock);
>> -			return false;
>> -		}
>> -	}
>> -	spin_unlock(&gtt->guptasklock);
>> -
>> -	atomic_inc(&gtt->mmu_invalidations);
>> -
>>   	return true;
>>   }
>>   
>>   /**
>> - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
>> + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
>>    */
>> -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
>> -				       int *last_invalidated)
>> -{
>> -	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> -	int prev_invalidated = *last_invalidated;
>> -
>> -	*last_invalidated = atomic_read(&gtt->mmu_invalidations);
>> -	return prev_invalidated != *last_invalidated;
>> -}
>> -
>> -/**
>> - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
>> - * been invalidated since the last time they've been set?
>> - */
>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
>>   {
>>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
>>   
>>   	if (gtt == NULL || !gtt->userptr)
>>   		return false;
>>   
>> -	return atomic_read(&gtt->mmu_invalidations) != gtt->last_set_pages;
>> +	return true;
>>   }
>>   
>>   /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index b5b2d101f7db..c5a1e78a09b1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -102,7 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
>>   int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>>   
>>   int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
>> -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
>>   void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
>>   int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>>   				     uint32_t flags);
>> @@ -112,7 +112,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
>>   				  unsigned long end);
>>   bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
>>   				       int *last_invalidated);
>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
>>   bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
>>   uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
>>   uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index f059ec314f2b..ab8fa84b27a6 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -619,7 +619,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
>>   	entry->tv.bo = &vm->root.base.bo->tbo;
>>   	/* One for the VM updates, one for TTM and one for the CS job */
>>   	entry->tv.num_shared = 3;
>> -	entry->user_pages = NULL;
>>   	list_add(&entry->tv.head, validated);
>>   }
>>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
       [not found]     ` <BY1PR12MB0502882984A59380E5F17F66B4AA0-PicGAnIBOobrCwm+z9iKNgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2018-12-13 21:15       ` Yang, Philip
  0 siblings, 0 replies; 13+ messages in thread
From: Yang, Philip @ 2018-12-13 21:15 UTC (permalink / raw)
  To: Zhou, David(ChunMing), amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

I thought to change the filename to amdgpu_hmm.c/h, then change the data 
structure,
variables, function name from amdgpu_mn_* to amdgpu_hmm_*, seems too 
many changes,
so I gave up that change.

Philip

On 2018-12-07 7:00 a.m., Zhou, David(ChunMing) wrote:
> Even you should rename amdgpu_mn.c/h to amdgpu_hmm.c/h.
>
> -David
>
>> -----Original Message-----
>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Yang,
>> Philip
>> Sent: Friday, December 07, 2018 5:03 AM
>> To: amd-gfx@lists.freedesktop.org
>> Cc: Yang, Philip <Philip.Yang@amd.com>
>> Subject: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu
>> notifier v6
>>
>> Replace our MMU notifier with
>> hmm_mirror_ops.sync_cpu_device_pagetables
>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a
>> dependency in DRM_AMDGPU_USERPTR Kconfig.
>>
>> It supports both KFD userptr and gfx userptr paths.
>>
>> The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
>> kernel now.
>>
>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>>   drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 122 ++++++++++-------------
>> --
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>>   4 files changed, 55 insertions(+), 77 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig
>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> index 9221e5489069..960a63355705 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK  config
>> DRM_AMDGPU_USERPTR
>>   	bool "Always enable userptr write support"
>>   	depends on DRM_AMDGPU
>> -	select MMU_NOTIFIER
>> +	select HMM_MIRROR
>>   	help
>> -	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
>> -	  selected to enabled full userptr support.
>> +	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>> +	  isn't already selected to enabled full userptr support.
>>
>>   config DRM_AMDGPU_GART_DEBUGFS
>>   	bool "Allow GART access through debugfs"
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index f76bcb9c45e4..675efc850ff4 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -172,7 +172,7 @@ endif
>>   amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>>   amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>>   amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>> +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
>>
>>   include $(FULL_AMD_PATH)/powerplay/Makefile
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> index e55508b39496..5d518d2bb9be 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> @@ -45,7 +45,7 @@
>>
>>   #include <linux/firmware.h>
>>   #include <linux/module.h>
>> -#include <linux/mmu_notifier.h>
>> +#include <linux/hmm.h>
>>   #include <linux/interval_tree.h>
>>   #include <drm/drmP.h>
>>   #include <drm/drm.h>
>> @@ -58,7 +58,6 @@
>>    *
>>    * @adev: amdgpu device pointer
>>    * @mm: process address space
>> - * @mn: MMU notifier structure
>>    * @type: type of MMU notifier
>>    * @work: destruction work item
>>    * @node: hash table node to find structure by adev and mn @@ -66,6 +65,7
>> @@
>>    * @objects: interval tree containing amdgpu_mn_nodes
>>    * @read_lock: mutex for recursive locking of @lock
>>    * @recursion: depth of recursion
>> + * @mirror: HMM mirror function support
>>    *
>>    * Data for each amdgpu device and process address space.
>>    */
>> @@ -73,7 +73,6 @@ struct amdgpu_mn {
>>   	/* constant after initialisation */
>>   	struct amdgpu_device	*adev;
>>   	struct mm_struct	*mm;
>> -	struct mmu_notifier	mn;
>>   	enum amdgpu_mn_type	type;
>>
>>   	/* only used on destruction */
>> @@ -87,6 +86,9 @@ struct amdgpu_mn {
>>   	struct rb_root_cached	objects;
>>   	struct mutex		read_lock;
>>   	atomic_t		recursion;
>> +
>> +	/* HMM mirror */
>> +	struct hmm_mirror	mirror;
>>   };
>>
>>   /**
>> @@ -103,7 +105,7 @@ struct amdgpu_mn_node {  };
>>
>>   /**
>> - * amdgpu_mn_destroy - destroy the MMU notifier
>> + * amdgpu_mn_destroy - destroy the HMM mirror
>>    *
>>    * @work: previously sheduled work item
>>    *
>> @@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct
>> *work)
>>   	}
>>   	up_write(&amn->lock);
>>   	mutex_unlock(&adev->mn_lock);
>> -	mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>> +
>> +	hmm_mirror_unregister(&amn->mirror);
>>   	kfree(amn);
>>   }
>>
>>   /**
>> - * amdgpu_mn_release - callback to notify about mm destruction
>> + * amdgpu_hmm_mirror_release - callback to notify about mm destruction
>>    *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> + * @mirror: the HMM mirror (mm) this callback is about
>>    *
>> - * Shedule a work item to lazy destroy our notifier.
>> + * Shedule a work item to lazy destroy HMM mirror.
>>    */
>> -static void amdgpu_mn_release(struct mmu_notifier *mn,
>> -			      struct mm_struct *mm)
>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>>   {
>> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
>> mn);
>> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
>> +mirror);
>>
>>   	INIT_WORK(&amn->work, amdgpu_mn_destroy);
>>   	schedule_work(&amn->work);
>>   }
>>
>> -
>>   /**
>>    * amdgpu_mn_lock - take the write side lock for this notifier
>>    *
>> @@ -237,21 +237,19 @@ static void amdgpu_mn_invalidate_node(struct
>> amdgpu_mn_node *node,
>>   /**
>>    * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm
>> change
>>    *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> + * @mirror: the hmm_mirror (mm) is about to update
>> + * @update: the update start, end address
>>    *
>>    * Block for operations on BOs to finish and mark pages as accessed and
>>    * potentially dirty.
>>    */
>> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>> -						 struct mm_struct *mm,
>> -						 unsigned long start,
>> -						 unsigned long end,
>> -						 bool blockable)
>> +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
>> +			const struct hmm_update *update)
>>   {
>> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
>> mn);
>> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
>> mirror);
>> +	unsigned long start = update->start;
>> +	unsigned long end = update->end;
>> +	bool blockable = update->blockable;
>>   	struct interval_tree_node *it;
>>
>>   	/* notification is exclusive, but interval is inclusive */ @@ -278,28
>> +276,28 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct
>> mmu_notifier *mn,
>>   		amdgpu_mn_invalidate_node(node, start, end);
>>   	}
>>
>> +	amdgpu_mn_read_unlock(amn);
>> +
>>   	return 0;
>>   }
>>
>>   /**
>>    * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm
>> change
>>    *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> + * @mirror: the hmm_mirror (mm) is about to update
>> + * @update: the update start, end address
>>    *
>>    * We temporarily evict all BOs between start and end. This
>>    * necessitates evicting all user-mode queues of the process. The BOs
>>    * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>>    */
>> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier
>> *mn,
>> -						 struct mm_struct *mm,
>> -						 unsigned long start,
>> -						 unsigned long end,
>> -						 bool blockable)
>> +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
>> +			const struct hmm_update *update)
>>   {
>> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
>> mn);
>> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
>> mirror);
>> +	unsigned long start = update->start;
>> +	unsigned long end = update->end;
>> +	bool blockable = update->blockable;
>>   	struct interval_tree_node *it;
>>
>>   	/* notification is exclusive, but interval is inclusive */ @@ -326,59
>> +324,39 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct
>> mmu_notifier *mn,
>>
>>   			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>>   							 start, end))
>> -				amdgpu_amdkfd_evict_userptr(mem, mm);
>> +				amdgpu_amdkfd_evict_userptr(mem, amn-
>>> mm);
>>   		}
>>   	}
>>
>> +	amdgpu_mn_read_unlock(amn);
>> +
>>   	return 0;
>>   }
>>
>> -/**
>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
>> - *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> - *
>> - * Release the lock again to allow new command submissions.
>> +/* Low bits of any reasonable mm pointer will be unused due to struct
>> + * alignment. Use these bits to make a unique key from the mm pointer
>> + * and notifier type.
>>    */
>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>> -					   struct mm_struct *mm,
>> -					   unsigned long start,
>> -					   unsigned long end)
>> -{
>> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
>> mn);
>> -
>> -	amdgpu_mn_read_unlock(amn);
>> -}
>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>
>> -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>   	[AMDGPU_MN_TYPE_GFX] = {
>> -		.release = amdgpu_mn_release,
>> -		.invalidate_range_start =
>> amdgpu_mn_invalidate_range_start_gfx,
>> -		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
>> +		.sync_cpu_device_pagetables =
>> amdgpu_mn_sync_pagetables_gfx,
>> +		.release = amdgpu_hmm_mirror_release
>>   	},
>>   	[AMDGPU_MN_TYPE_HSA] = {
>> -		.release = amdgpu_mn_release,
>> -		.invalidate_range_start =
>> amdgpu_mn_invalidate_range_start_hsa,
>> -		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
>> +		.sync_cpu_device_pagetables =
>> amdgpu_mn_sync_pagetables_hsa,
>> +		.release = amdgpu_hmm_mirror_release
>>   	},
>>   };
>>
>> -/* Low bits of any reasonable mm pointer will be unused due to struct
>> - * alignment. Use these bits to make a unique key from the mm pointer
>> - * and notifier type.
>> - */
>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>> -
>>   /**
>> - * amdgpu_mn_get - create notifier context
>> + * amdgpu_mn_get - create HMM mirror context
>>    *
>>    * @adev: amdgpu device pointer
>>    * @type: type of MMU notifier context
>>    *
>> - * Creates a notifier context for current->mm.
>> + * Creates a HMM mirror context for current->mm.
>>    */
>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>   				enum amdgpu_mn_type type)
>> @@ -408,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct
>> amdgpu_device *adev,
>>   	amn->mm = mm;
>>   	init_rwsem(&amn->lock);
>>   	amn->type = type;
>> -	amn->mn.ops = &amdgpu_mn_ops[type];
>>   	amn->objects = RB_ROOT_CACHED;
>>   	mutex_init(&amn->read_lock);
>>   	atomic_set(&amn->recursion, 0);
>>
>> -	r = __mmu_notifier_register(&amn->mn, mm);
>> +	amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
>> +	r = hmm_mirror_register(&amn->mirror, mm);
>>   	if (r)
>>   		goto free_amn;
>>
>> @@ -439,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct
>> amdgpu_device *adev,
>>    * @bo: amdgpu buffer object
>>    * @addr: userptr addr we should monitor
>>    *
>> - * Registers an MMU notifier for the given BO at the specified address.
>> + * Registers an HMM mirror for the given BO at the specified address.
>>    * Returns 0 on success, -ERRNO if anything goes wrong.
>>    */
>>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) @@ -
>> 495,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo,
>> unsigned long addr)  }
>>
>>   /**
>> - * amdgpu_mn_unregister - unregister a BO for notifier updates
>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>>    *
>>    * @bo: amdgpu buffer object
>>    *
>> - * Remove any registration of MMU notifier updates from the buffer object.
>> + * Remove any registration of HMM mirror updates from the buffer object.
>>    */
>>   void amdgpu_mn_unregister(struct amdgpu_bo *bo)  { diff --git
>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> index eb0f432f78fe..0a51fd00021c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>>   	AMDGPU_MN_TYPE_HSA,
>>   };
>>
>> -#if defined(CONFIG_MMU_NOTIFIER)
>> +#if defined(CONFIG_HMM_MIRROR)
>>   void amdgpu_mn_lock(struct amdgpu_mn *mn);  void
>> amdgpu_mn_unlock(struct amdgpu_mn *mn);  struct amdgpu_mn
>> *amdgpu_mn_get(struct amdgpu_device *adev,
>> --
>> 2.17.1
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
       [not found]             ` <ee46d8e7-cf64-eb8c-cfd6-793db28c6c69-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-04 18:09               ` Christian König
  0 siblings, 0 replies; 13+ messages in thread
From: Christian König @ 2019-02-04 18:09 UTC (permalink / raw)
  To: Yang, Philip, Koenig, Christian,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 04.02.19 um 18:17 schrieb Yang, Philip:
>
> On 2019-02-04 10:18 a.m., Christian König wrote:
>> Am 04.02.19 um 16:06 schrieb Yang, Philip:
>>> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
>>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
>>> DRM_AMDGPU_USERPTR Kconfig.
>>>
>>> It supports both KFD userptr and gfx userptr paths.
>>>
>>> The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
>>> kernel now.
>>>
>>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>>>    drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 139 +++++++++++--------------
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>>>    4 files changed, 67 insertions(+), 82 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> index 9221e5489069..960a63355705 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>>>    config DRM_AMDGPU_USERPTR
>>>        bool "Always enable userptr write support"
>>>        depends on DRM_AMDGPU
>>> -    select MMU_NOTIFIER
>>> +    select HMM_MIRROR
>>>        help
>>> -      This option selects CONFIG_MMU_NOTIFIER if it isn't already
>>> -      selected to enabled full userptr support.
>>> +      This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>>> +      isn't already selected to enabled full userptr support.
>>>    config DRM_AMDGPU_GART_DEBUGFS
>>>        bool "Allow GART access through debugfs"
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> index 466da5954a68..851001ced5e8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> @@ -172,7 +172,7 @@ endif
>>>    amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>>>    amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>>>    amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>>> +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
>>>    include $(FULL_AMD_PATH)/powerplay/Makefile
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> index 3e6823fdd939..5d518d2bb9be 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> @@ -45,7 +45,7 @@
>>>    #include <linux/firmware.h>
>>>    #include <linux/module.h>
>>> -#include <linux/mmu_notifier.h>
>>> +#include <linux/hmm.h>
>>>    #include <linux/interval_tree.h>
>>>    #include <drm/drmP.h>
>>>    #include <drm/drm.h>
>>> @@ -58,7 +58,6 @@
>>>     *
>>>     * @adev: amdgpu device pointer
>>>     * @mm: process address space
>>> - * @mn: MMU notifier structure
>>>     * @type: type of MMU notifier
>>>     * @work: destruction work item
>>>     * @node: hash table node to find structure by adev and mn
>>> @@ -66,6 +65,7 @@
>>>     * @objects: interval tree containing amdgpu_mn_nodes
>>>     * @read_lock: mutex for recursive locking of @lock
>>>     * @recursion: depth of recursion
>>> + * @mirror: HMM mirror function support
>>>     *
>>>     * Data for each amdgpu device and process address space.
>>>     */
>>> @@ -73,7 +73,6 @@ struct amdgpu_mn {
>>>        /* constant after initialisation */
>>>        struct amdgpu_device    *adev;
>>>        struct mm_struct    *mm;
>>> -    struct mmu_notifier    mn;
>>>        enum amdgpu_mn_type    type;
>>>        /* only used on destruction */
>>> @@ -87,6 +86,9 @@ struct amdgpu_mn {
>>>        struct rb_root_cached    objects;
>>>        struct mutex        read_lock;
>>>        atomic_t        recursion;
>> With HMM we don't need this any more. Please remove it and simplify
>> amdgpu_mn_read_lock() and amdgpu_mn_read_unlock().
>>
> Thanks, this makes sense because HMM uses hmm->mirror_sem to serialize
> invalidate range.
>
> amn->read_lock is also not needed anymore, this was used to protect
> atomic operations for atomic_inc_return(amn->recursion) and
> down_read(amn->lock).
>
> Just one amn->lock is needed to sync amdgpu_cs_submit and userptr
> update. I will submit another patch.

Sounds good. I will take a closer look at the other patches tomorrow.

Thanks,
Christian.

>
> Philip
>
>> Apart from that looks good to me,
>> Christian.
>>
>>> +
>>> +    /* HMM mirror */
>>> +    struct hmm_mirror    mirror;
>>>    };
>>>    /**
>>> @@ -103,7 +105,7 @@ struct amdgpu_mn_node {
>>>    };
>>>    /**
>>> - * amdgpu_mn_destroy - destroy the MMU notifier
>>> + * amdgpu_mn_destroy - destroy the HMM mirror
>>>     *
>>>     * @work: previously sheduled work item
>>>     *
>>> @@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct
>>> *work)
>>>        }
>>>        up_write(&amn->lock);
>>>        mutex_unlock(&adev->mn_lock);
>>> -    mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>>> +
>>> +    hmm_mirror_unregister(&amn->mirror);
>>>        kfree(amn);
>>>    }
>>>    /**
>>> - * amdgpu_mn_release - callback to notify about mm destruction
>>> + * amdgpu_hmm_mirror_release - callback to notify about mm destruction
>>>     *
>>> - * @mn: our notifier
>>> - * @mm: the mm this callback is about
>>> + * @mirror: the HMM mirror (mm) this callback is about
>>>     *
>>> - * Shedule a work item to lazy destroy our notifier.
>>> + * Shedule a work item to lazy destroy HMM mirror.
>>>     */
>>> -static void amdgpu_mn_release(struct mmu_notifier *mn,
>>> -                  struct mm_struct *mm)
>>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>>>    {
>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
>>> mirror);
>>>        INIT_WORK(&amn->work, amdgpu_mn_destroy);
>>>        schedule_work(&amn->work);
>>>    }
>>> -
>>>    /**
>>>     * amdgpu_mn_lock - take the write side lock for this notifier
>>>     *
>>> @@ -237,141 +237,126 @@ static void amdgpu_mn_invalidate_node(struct
>>> amdgpu_mn_node *node,
>>>    /**
>>>     * amdgpu_mn_invalidate_range_start_gfx - callback to notify about
>>> mm change
>>>     *
>>> - * @mn: our notifier
>>> - * @range: mmu notifier context
>>> + * @mirror: the hmm_mirror (mm) is about to update
>>> + * @update: the update start, end address
>>>     *
>>>     * Block for operations on BOs to finish and mark pages as accessed and
>>>     * potentially dirty.
>>>     */
>>> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>>> -            const struct mmu_notifier_range *range)
>>> +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
>>> +            const struct hmm_update *update)
>>>    {
>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
>>> mirror);
>>> +    unsigned long start = update->start;
>>> +    unsigned long end = update->end;
>>> +    bool blockable = update->blockable;
>>>        struct interval_tree_node *it;
>>> -    unsigned long end;
>>>        /* notification is exclusive, but interval is inclusive */
>>> -    end = range->end - 1;
>>> +    end -= 1;
>>>        /* TODO we should be able to split locking for interval tree and
>>>         * amdgpu_mn_invalidate_node
>>>         */
>>> -    if (amdgpu_mn_read_lock(amn, range->blockable))
>>> +    if (amdgpu_mn_read_lock(amn, blockable))
>>>            return -EAGAIN;
>>> -    it = interval_tree_iter_first(&amn->objects, range->start, end);
>>> +    it = interval_tree_iter_first(&amn->objects, start, end);
>>>        while (it) {
>>>            struct amdgpu_mn_node *node;
>>> -        if (!range->blockable) {
>>> +        if (!blockable) {
>>>                amdgpu_mn_read_unlock(amn);
>>>                return -EAGAIN;
>>>            }
>>>            node = container_of(it, struct amdgpu_mn_node, it);
>>> -        it = interval_tree_iter_next(it, range->start, end);
>>> +        it = interval_tree_iter_next(it, start, end);
>>> -        amdgpu_mn_invalidate_node(node, range->start, end);
>>> +        amdgpu_mn_invalidate_node(node, start, end);
>>>        }
>>> +    amdgpu_mn_read_unlock(amn);
>>> +
>>>        return 0;
>>>    }
>>>    /**
>>>     * amdgpu_mn_invalidate_range_start_hsa - callback to notify about
>>> mm change
>>>     *
>>> - * @mn: our notifier
>>> - * @mm: the mm this callback is about
>>> - * @start: start of updated range
>>> - * @end: end of updated range
>>> + * @mirror: the hmm_mirror (mm) is about to update
>>> + * @update: the update start, end address
>>>     *
>>>     * We temporarily evict all BOs between start and end. This
>>>     * necessitates evicting all user-mode queues of the process. The BOs
>>>     * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>>>     */
>>> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>>> -            const struct mmu_notifier_range *range)
>>> +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
>>> +            const struct hmm_update *update)
>>>    {
>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
>>> mirror);
>>> +    unsigned long start = update->start;
>>> +    unsigned long end = update->end;
>>> +    bool blockable = update->blockable;
>>>        struct interval_tree_node *it;
>>> -    unsigned long end;
>>>        /* notification is exclusive, but interval is inclusive */
>>> -    end = range->end - 1;
>>> +    end -= 1;
>>> -    if (amdgpu_mn_read_lock(amn, range->blockable))
>>> +    if (amdgpu_mn_read_lock(amn, blockable))
>>>            return -EAGAIN;
>>> -    it = interval_tree_iter_first(&amn->objects, range->start, end);
>>> +    it = interval_tree_iter_first(&amn->objects, start, end);
>>>        while (it) {
>>>            struct amdgpu_mn_node *node;
>>>            struct amdgpu_bo *bo;
>>> -        if (!range->blockable) {
>>> +        if (!blockable) {
>>>                amdgpu_mn_read_unlock(amn);
>>>                return -EAGAIN;
>>>            }
>>>            node = container_of(it, struct amdgpu_mn_node, it);
>>> -        it = interval_tree_iter_next(it, range->start, end);
>>> +        it = interval_tree_iter_next(it, start, end);
>>>            list_for_each_entry(bo, &node->bos, mn_list) {
>>>                struct kgd_mem *mem = bo->kfd_bo;
>>>                if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>>> -                             range->start,
>>> -                             end))
>>> -                amdgpu_amdkfd_evict_userptr(mem, range->mm);
>>> +                             start, end))
>>> +                amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>>>            }
>>>        }
>>> +    amdgpu_mn_read_unlock(amn);
>>> +
>>>        return 0;
>>>    }
>>> -/**
>>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
>>> - *
>>> - * @mn: our notifier
>>> - * @mm: the mm this callback is about
>>> - * @start: start of updated range
>>> - * @end: end of updated range
>>> - *
>>> - * Release the lock again to allow new command submissions.
>>> +/* Low bits of any reasonable mm pointer will be unused due to struct
>>> + * alignment. Use these bits to make a unique key from the mm pointer
>>> + * and notifier type.
>>>     */
>>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>>> -            const struct mmu_notifier_range *range)
>>> -{
>>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> -
>>> -    amdgpu_mn_read_unlock(amn);
>>> -}
>>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>> -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
>>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>>        [AMDGPU_MN_TYPE_GFX] = {
>>> -        .release = amdgpu_mn_release,
>>> -        .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
>>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>> +        .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
>>> +        .release = amdgpu_hmm_mirror_release
>>>        },
>>>        [AMDGPU_MN_TYPE_HSA] = {
>>> -        .release = amdgpu_mn_release,
>>> -        .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
>>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>> +        .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
>>> +        .release = amdgpu_hmm_mirror_release
>>>        },
>>>    };
>>> -/* Low bits of any reasonable mm pointer will be unused due to struct
>>> - * alignment. Use these bits to make a unique key from the mm pointer
>>> - * and notifier type.
>>> - */
>>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>> -
>>>    /**
>>> - * amdgpu_mn_get - create notifier context
>>> + * amdgpu_mn_get - create HMM mirror context
>>>     *
>>>     * @adev: amdgpu device pointer
>>>     * @type: type of MMU notifier context
>>>     *
>>> - * Creates a notifier context for current->mm.
>>> + * Creates a HMM mirror context for current->mm.
>>>     */
>>>    struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>>                    enum amdgpu_mn_type type)
>>> @@ -401,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct
>>> amdgpu_device *adev,
>>>        amn->mm = mm;
>>>        init_rwsem(&amn->lock);
>>>        amn->type = type;
>>> -    amn->mn.ops = &amdgpu_mn_ops[type];
>>>        amn->objects = RB_ROOT_CACHED;
>>>        mutex_init(&amn->read_lock);
>>>        atomic_set(&amn->recursion, 0);
>>> -    r = __mmu_notifier_register(&amn->mn, mm);
>>> +    amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
>>> +    r = hmm_mirror_register(&amn->mirror, mm);
>>>        if (r)
>>>            goto free_amn;
>>> @@ -432,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct
>>> amdgpu_device *adev,
>>>     * @bo: amdgpu buffer object
>>>     * @addr: userptr addr we should monitor
>>>     *
>>> - * Registers an MMU notifier for the given BO at the specified address.
>>> + * Registers an HMM mirror for the given BO at the specified address.
>>>     * Returns 0 on success, -ERRNO if anything goes wrong.
>>>     */
>>>    int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>>> @@ -488,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo,
>>> unsigned long addr)
>>>    }
>>>    /**
>>> - * amdgpu_mn_unregister - unregister a BO for notifier updates
>>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>>>     *
>>>     * @bo: amdgpu buffer object
>>>     *
>>> - * Remove any registration of MMU notifier updates from the buffer
>>> object.
>>> + * Remove any registration of HMM mirror updates from the buffer object.
>>>     */
>>>    void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>>    {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>> index eb0f432f78fe..0a51fd00021c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>>>        AMDGPU_MN_TYPE_HSA,
>>>    };
>>> -#if defined(CONFIG_MMU_NOTIFIER)
>>> +#if defined(CONFIG_HMM_MIRROR)
>>>    void amdgpu_mn_lock(struct amdgpu_mn *mn);
>>>    void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>>>    struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
       [not found]         ` <71e6094c-a257-7aa3-854c-9a9dff163b01-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-02-04 17:17           ` Yang, Philip
       [not found]             ` <ee46d8e7-cf64-eb8c-cfd6-793db28c6c69-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: Yang, Philip @ 2019-02-04 17:17 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW



On 2019-02-04 10:18 a.m., Christian König wrote:
> Am 04.02.19 um 16:06 schrieb Yang, Philip:
>> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
>> DRM_AMDGPU_USERPTR Kconfig.
>>
>> It supports both KFD userptr and gfx userptr paths.
>>
>> The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
>> kernel now.
>>
>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>>   drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 139 +++++++++++--------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>>   4 files changed, 67 insertions(+), 82 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig 
>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> index 9221e5489069..960a63355705 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>>   config DRM_AMDGPU_USERPTR
>>       bool "Always enable userptr write support"
>>       depends on DRM_AMDGPU
>> -    select MMU_NOTIFIER
>> +    select HMM_MIRROR
>>       help
>> -      This option selects CONFIG_MMU_NOTIFIER if it isn't already
>> -      selected to enabled full userptr support.
>> +      This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>> +      isn't already selected to enabled full userptr support.
>>   config DRM_AMDGPU_GART_DEBUGFS
>>       bool "Allow GART access through debugfs"
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index 466da5954a68..851001ced5e8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -172,7 +172,7 @@ endif
>>   amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>>   amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>>   amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>> +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
>>   include $(FULL_AMD_PATH)/powerplay/Makefile
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> index 3e6823fdd939..5d518d2bb9be 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> @@ -45,7 +45,7 @@
>>   #include <linux/firmware.h>
>>   #include <linux/module.h>
>> -#include <linux/mmu_notifier.h>
>> +#include <linux/hmm.h>
>>   #include <linux/interval_tree.h>
>>   #include <drm/drmP.h>
>>   #include <drm/drm.h>
>> @@ -58,7 +58,6 @@
>>    *
>>    * @adev: amdgpu device pointer
>>    * @mm: process address space
>> - * @mn: MMU notifier structure
>>    * @type: type of MMU notifier
>>    * @work: destruction work item
>>    * @node: hash table node to find structure by adev and mn
>> @@ -66,6 +65,7 @@
>>    * @objects: interval tree containing amdgpu_mn_nodes
>>    * @read_lock: mutex for recursive locking of @lock
>>    * @recursion: depth of recursion
>> + * @mirror: HMM mirror function support
>>    *
>>    * Data for each amdgpu device and process address space.
>>    */
>> @@ -73,7 +73,6 @@ struct amdgpu_mn {
>>       /* constant after initialisation */
>>       struct amdgpu_device    *adev;
>>       struct mm_struct    *mm;
>> -    struct mmu_notifier    mn;
>>       enum amdgpu_mn_type    type;
>>       /* only used on destruction */
>> @@ -87,6 +86,9 @@ struct amdgpu_mn {
>>       struct rb_root_cached    objects;
>>       struct mutex        read_lock;
> 
>>       atomic_t        recursion;
> 
> With HMM we don't need this any more. Please remove it and simplify 
> amdgpu_mn_read_lock() and amdgpu_mn_read_unlock().
> 
Thanks, this makes sense because HMM uses hmm->mirror_sem to serialize 
invalidate range.

amn->read_lock is also not needed anymore, this was used to protect 
atomic operations for atomic_inc_return(amn->recursion) and 
down_read(amn->lock).

Just one amn->lock is needed to sync amdgpu_cs_submit and userptr 
update. I will submit another patch.

Philip

> Apart from that looks good to me,
> Christian.
> 
>> +
>> +    /* HMM mirror */
>> +    struct hmm_mirror    mirror;
>>   };
>>   /**
>> @@ -103,7 +105,7 @@ struct amdgpu_mn_node {
>>   };
>>   /**
>> - * amdgpu_mn_destroy - destroy the MMU notifier
>> + * amdgpu_mn_destroy - destroy the HMM mirror
>>    *
>>    * @work: previously sheduled work item
>>    *
>> @@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct 
>> *work)
>>       }
>>       up_write(&amn->lock);
>>       mutex_unlock(&adev->mn_lock);
>> -    mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>> +
>> +    hmm_mirror_unregister(&amn->mirror);
>>       kfree(amn);
>>   }
>>   /**
>> - * amdgpu_mn_release - callback to notify about mm destruction
>> + * amdgpu_hmm_mirror_release - callback to notify about mm destruction
>>    *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> + * @mirror: the HMM mirror (mm) this callback is about
>>    *
>> - * Shedule a work item to lazy destroy our notifier.
>> + * Shedule a work item to lazy destroy HMM mirror.
>>    */
>> -static void amdgpu_mn_release(struct mmu_notifier *mn,
>> -                  struct mm_struct *mm)
>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>>   {
>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>> mirror);
>>       INIT_WORK(&amn->work, amdgpu_mn_destroy);
>>       schedule_work(&amn->work);
>>   }
>> -
>>   /**
>>    * amdgpu_mn_lock - take the write side lock for this notifier
>>    *
>> @@ -237,141 +237,126 @@ static void amdgpu_mn_invalidate_node(struct 
>> amdgpu_mn_node *node,
>>   /**
>>    * amdgpu_mn_invalidate_range_start_gfx - callback to notify about 
>> mm change
>>    *
>> - * @mn: our notifier
>> - * @range: mmu notifier context
>> + * @mirror: the hmm_mirror (mm) is about to update
>> + * @update: the update start, end address
>>    *
>>    * Block for operations on BOs to finish and mark pages as accessed and
>>    * potentially dirty.
>>    */
>> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>> -            const struct mmu_notifier_range *range)
>> +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
>> +            const struct hmm_update *update)
>>   {
>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>> mirror);
>> +    unsigned long start = update->start;
>> +    unsigned long end = update->end;
>> +    bool blockable = update->blockable;
>>       struct interval_tree_node *it;
>> -    unsigned long end;
>>       /* notification is exclusive, but interval is inclusive */
>> -    end = range->end - 1;
>> +    end -= 1;
>>       /* TODO we should be able to split locking for interval tree and
>>        * amdgpu_mn_invalidate_node
>>        */
>> -    if (amdgpu_mn_read_lock(amn, range->blockable))
>> +    if (amdgpu_mn_read_lock(amn, blockable))
>>           return -EAGAIN;
>> -    it = interval_tree_iter_first(&amn->objects, range->start, end);
>> +    it = interval_tree_iter_first(&amn->objects, start, end);
>>       while (it) {
>>           struct amdgpu_mn_node *node;
>> -        if (!range->blockable) {
>> +        if (!blockable) {
>>               amdgpu_mn_read_unlock(amn);
>>               return -EAGAIN;
>>           }
>>           node = container_of(it, struct amdgpu_mn_node, it);
>> -        it = interval_tree_iter_next(it, range->start, end);
>> +        it = interval_tree_iter_next(it, start, end);
>> -        amdgpu_mn_invalidate_node(node, range->start, end);
>> +        amdgpu_mn_invalidate_node(node, start, end);
>>       }
>> +    amdgpu_mn_read_unlock(amn);
>> +
>>       return 0;
>>   }
>>   /**
>>    * amdgpu_mn_invalidate_range_start_hsa - callback to notify about 
>> mm change
>>    *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> + * @mirror: the hmm_mirror (mm) is about to update
>> + * @update: the update start, end address
>>    *
>>    * We temporarily evict all BOs between start and end. This
>>    * necessitates evicting all user-mode queues of the process. The BOs
>>    * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>>    */
>> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>> -            const struct mmu_notifier_range *range)
>> +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
>> +            const struct hmm_update *update)
>>   {
>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> +    struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, 
>> mirror);
>> +    unsigned long start = update->start;
>> +    unsigned long end = update->end;
>> +    bool blockable = update->blockable;
>>       struct interval_tree_node *it;
>> -    unsigned long end;
>>       /* notification is exclusive, but interval is inclusive */
>> -    end = range->end - 1;
>> +    end -= 1;
>> -    if (amdgpu_mn_read_lock(amn, range->blockable))
>> +    if (amdgpu_mn_read_lock(amn, blockable))
>>           return -EAGAIN;
>> -    it = interval_tree_iter_first(&amn->objects, range->start, end);
>> +    it = interval_tree_iter_first(&amn->objects, start, end);
>>       while (it) {
>>           struct amdgpu_mn_node *node;
>>           struct amdgpu_bo *bo;
>> -        if (!range->blockable) {
>> +        if (!blockable) {
>>               amdgpu_mn_read_unlock(amn);
>>               return -EAGAIN;
>>           }
>>           node = container_of(it, struct amdgpu_mn_node, it);
>> -        it = interval_tree_iter_next(it, range->start, end);
>> +        it = interval_tree_iter_next(it, start, end);
>>           list_for_each_entry(bo, &node->bos, mn_list) {
>>               struct kgd_mem *mem = bo->kfd_bo;
>>               if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>> -                             range->start,
>> -                             end))
>> -                amdgpu_amdkfd_evict_userptr(mem, range->mm);
>> +                             start, end))
>> +                amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>>           }
>>       }
>> +    amdgpu_mn_read_unlock(amn);
>> +
>>       return 0;
>>   }
>> -/**
>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
>> - *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> - *
>> - * Release the lock again to allow new command submissions.
>> +/* Low bits of any reasonable mm pointer will be unused due to struct
>> + * alignment. Use these bits to make a unique key from the mm pointer
>> + * and notifier type.
>>    */
>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>> -            const struct mmu_notifier_range *range)
>> -{
>> -    struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> -
>> -    amdgpu_mn_read_unlock(amn);
>> -}
>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>> -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>       [AMDGPU_MN_TYPE_GFX] = {
>> -        .release = amdgpu_mn_release,
>> -        .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>> +        .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
>> +        .release = amdgpu_hmm_mirror_release
>>       },
>>       [AMDGPU_MN_TYPE_HSA] = {
>> -        .release = amdgpu_mn_release,
>> -        .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
>> -        .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>> +        .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
>> +        .release = amdgpu_hmm_mirror_release
>>       },
>>   };
>> -/* Low bits of any reasonable mm pointer will be unused due to struct
>> - * alignment. Use these bits to make a unique key from the mm pointer
>> - * and notifier type.
>> - */
>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>> -
>>   /**
>> - * amdgpu_mn_get - create notifier context
>> + * amdgpu_mn_get - create HMM mirror context
>>    *
>>    * @adev: amdgpu device pointer
>>    * @type: type of MMU notifier context
>>    *
>> - * Creates a notifier context for current->mm.
>> + * Creates a HMM mirror context for current->mm.
>>    */
>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>                   enum amdgpu_mn_type type)
>> @@ -401,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct 
>> amdgpu_device *adev,
>>       amn->mm = mm;
>>       init_rwsem(&amn->lock);
>>       amn->type = type;
>> -    amn->mn.ops = &amdgpu_mn_ops[type];
>>       amn->objects = RB_ROOT_CACHED;
>>       mutex_init(&amn->read_lock);
>>       atomic_set(&amn->recursion, 0);
>> -    r = __mmu_notifier_register(&amn->mn, mm);
>> +    amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
>> +    r = hmm_mirror_register(&amn->mirror, mm);
>>       if (r)
>>           goto free_amn;
>> @@ -432,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct 
>> amdgpu_device *adev,
>>    * @bo: amdgpu buffer object
>>    * @addr: userptr addr we should monitor
>>    *
>> - * Registers an MMU notifier for the given BO at the specified address.
>> + * Registers an HMM mirror for the given BO at the specified address.
>>    * Returns 0 on success, -ERRNO if anything goes wrong.
>>    */
>>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>> @@ -488,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, 
>> unsigned long addr)
>>   }
>>   /**
>> - * amdgpu_mn_unregister - unregister a BO for notifier updates
>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>>    *
>>    * @bo: amdgpu buffer object
>>    *
>> - * Remove any registration of MMU notifier updates from the buffer 
>> object.
>> + * Remove any registration of HMM mirror updates from the buffer object.
>>    */
>>   void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>   {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> index eb0f432f78fe..0a51fd00021c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>>       AMDGPU_MN_TYPE_HSA,
>>   };
>> -#if defined(CONFIG_MMU_NOTIFIER)
>> +#if defined(CONFIG_HMM_MIRROR)
>>   void amdgpu_mn_lock(struct amdgpu_mn *mn);
>>   void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
       [not found]     ` <20190204150613.5837-2-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-04 15:18       ` Christian König
       [not found]         ` <71e6094c-a257-7aa3-854c-9a9dff163b01-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: Christian König @ 2019-02-04 15:18 UTC (permalink / raw)
  To: Yang, Philip, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 04.02.19 um 16:06 schrieb Yang, Philip:
> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
> DRM_AMDGPU_USERPTR Kconfig.
>
> It supports both KFD userptr and gfx userptr paths.
>
> The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
> kernel now.
>
> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
>   drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 139 +++++++++++--------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
>   4 files changed, 67 insertions(+), 82 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
> index 9221e5489069..960a63355705 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>   config DRM_AMDGPU_USERPTR
>   	bool "Always enable userptr write support"
>   	depends on DRM_AMDGPU
> -	select MMU_NOTIFIER
> +	select HMM_MIRROR
>   	help
> -	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
> -	  selected to enabled full userptr support.
> +	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
> +	  isn't already selected to enabled full userptr support.
>   
>   config DRM_AMDGPU_GART_DEBUGFS
>   	bool "Allow GART access through debugfs"
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 466da5954a68..851001ced5e8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -172,7 +172,7 @@ endif
>   amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>   amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>   amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
> +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
>   
>   include $(FULL_AMD_PATH)/powerplay/Makefile
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index 3e6823fdd939..5d518d2bb9be 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -45,7 +45,7 @@
>   
>   #include <linux/firmware.h>
>   #include <linux/module.h>
> -#include <linux/mmu_notifier.h>
> +#include <linux/hmm.h>
>   #include <linux/interval_tree.h>
>   #include <drm/drmP.h>
>   #include <drm/drm.h>
> @@ -58,7 +58,6 @@
>    *
>    * @adev: amdgpu device pointer
>    * @mm: process address space
> - * @mn: MMU notifier structure
>    * @type: type of MMU notifier
>    * @work: destruction work item
>    * @node: hash table node to find structure by adev and mn
> @@ -66,6 +65,7 @@
>    * @objects: interval tree containing amdgpu_mn_nodes
>    * @read_lock: mutex for recursive locking of @lock
>    * @recursion: depth of recursion
> + * @mirror: HMM mirror function support
>    *
>    * Data for each amdgpu device and process address space.
>    */
> @@ -73,7 +73,6 @@ struct amdgpu_mn {
>   	/* constant after initialisation */
>   	struct amdgpu_device	*adev;
>   	struct mm_struct	*mm;
> -	struct mmu_notifier	mn;
>   	enum amdgpu_mn_type	type;
>   
>   	/* only used on destruction */
> @@ -87,6 +86,9 @@ struct amdgpu_mn {
>   	struct rb_root_cached	objects;
>   	struct mutex		read_lock;

>   	atomic_t		recursion;

With HMM we don't need this any more. Please remove it and simplify 
amdgpu_mn_read_lock() and amdgpu_mn_read_unlock().

Apart from that looks good to me,
Christian.

> +
> +	/* HMM mirror */
> +	struct hmm_mirror	mirror;
>   };
>   
>   /**
> @@ -103,7 +105,7 @@ struct amdgpu_mn_node {
>   };
>   
>   /**
> - * amdgpu_mn_destroy - destroy the MMU notifier
> + * amdgpu_mn_destroy - destroy the HMM mirror
>    *
>    * @work: previously sheduled work item
>    *
> @@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
>   	}
>   	up_write(&amn->lock);
>   	mutex_unlock(&adev->mn_lock);
> -	mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
> +
> +	hmm_mirror_unregister(&amn->mirror);
>   	kfree(amn);
>   }
>   
>   /**
> - * amdgpu_mn_release - callback to notify about mm destruction
> + * amdgpu_hmm_mirror_release - callback to notify about mm destruction
>    *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> + * @mirror: the HMM mirror (mm) this callback is about
>    *
> - * Shedule a work item to lazy destroy our notifier.
> + * Shedule a work item to lazy destroy HMM mirror.
>    */
> -static void amdgpu_mn_release(struct mmu_notifier *mn,
> -			      struct mm_struct *mm)
> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>   {
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
>   
>   	INIT_WORK(&amn->work, amdgpu_mn_destroy);
>   	schedule_work(&amn->work);
>   }
>   
> -
>   /**
>    * amdgpu_mn_lock - take the write side lock for this notifier
>    *
> @@ -237,141 +237,126 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
>   /**
>    * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
>    *
> - * @mn: our notifier
> - * @range: mmu notifier context
> + * @mirror: the hmm_mirror (mm) is about to update
> + * @update: the update start, end address
>    *
>    * Block for operations on BOs to finish and mark pages as accessed and
>    * potentially dirty.
>    */
> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
> -			const struct mmu_notifier_range *range)
> +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
> +			const struct hmm_update *update)
>   {
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
> +	unsigned long start = update->start;
> +	unsigned long end = update->end;
> +	bool blockable = update->blockable;
>   	struct interval_tree_node *it;
> -	unsigned long end;
>   
>   	/* notification is exclusive, but interval is inclusive */
> -	end = range->end - 1;
> +	end -= 1;
>   
>   	/* TODO we should be able to split locking for interval tree and
>   	 * amdgpu_mn_invalidate_node
>   	 */
> -	if (amdgpu_mn_read_lock(amn, range->blockable))
> +	if (amdgpu_mn_read_lock(amn, blockable))
>   		return -EAGAIN;
>   
> -	it = interval_tree_iter_first(&amn->objects, range->start, end);
> +	it = interval_tree_iter_first(&amn->objects, start, end);
>   	while (it) {
>   		struct amdgpu_mn_node *node;
>   
> -		if (!range->blockable) {
> +		if (!blockable) {
>   			amdgpu_mn_read_unlock(amn);
>   			return -EAGAIN;
>   		}
>   
>   		node = container_of(it, struct amdgpu_mn_node, it);
> -		it = interval_tree_iter_next(it, range->start, end);
> +		it = interval_tree_iter_next(it, start, end);
>   
> -		amdgpu_mn_invalidate_node(node, range->start, end);
> +		amdgpu_mn_invalidate_node(node, start, end);
>   	}
>   
> +	amdgpu_mn_read_unlock(amn);
> +
>   	return 0;
>   }
>   
>   /**
>    * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
>    *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> + * @mirror: the hmm_mirror (mm) is about to update
> + * @update: the update start, end address
>    *
>    * We temporarily evict all BOs between start and end. This
>    * necessitates evicting all user-mode queues of the process. The BOs
>    * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>    */
> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
> -			const struct mmu_notifier_range *range)
> +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
> +			const struct hmm_update *update)
>   {
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> +	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
> +	unsigned long start = update->start;
> +	unsigned long end = update->end;
> +	bool blockable = update->blockable;
>   	struct interval_tree_node *it;
> -	unsigned long end;
>   
>   	/* notification is exclusive, but interval is inclusive */
> -	end = range->end - 1;
> +	end -= 1;
>   
> -	if (amdgpu_mn_read_lock(amn, range->blockable))
> +	if (amdgpu_mn_read_lock(amn, blockable))
>   		return -EAGAIN;
>   
> -	it = interval_tree_iter_first(&amn->objects, range->start, end);
> +	it = interval_tree_iter_first(&amn->objects, start, end);
>   	while (it) {
>   		struct amdgpu_mn_node *node;
>   		struct amdgpu_bo *bo;
>   
> -		if (!range->blockable) {
> +		if (!blockable) {
>   			amdgpu_mn_read_unlock(amn);
>   			return -EAGAIN;
>   		}
>   
>   		node = container_of(it, struct amdgpu_mn_node, it);
> -		it = interval_tree_iter_next(it, range->start, end);
> +		it = interval_tree_iter_next(it, start, end);
>   
>   		list_for_each_entry(bo, &node->bos, mn_list) {
>   			struct kgd_mem *mem = bo->kfd_bo;
>   
>   			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
> -							 range->start,
> -							 end))
> -				amdgpu_amdkfd_evict_userptr(mem, range->mm);
> +							 start, end))
> +				amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>   		}
>   	}
>   
> +	amdgpu_mn_read_unlock(amn);
> +
>   	return 0;
>   }
>   
> -/**
> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
> - *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> - *
> - * Release the lock again to allow new command submissions.
> +/* Low bits of any reasonable mm pointer will be unused due to struct
> + * alignment. Use these bits to make a unique key from the mm pointer
> + * and notifier type.
>    */
> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
> -			const struct mmu_notifier_range *range)
> -{
> -	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> -
> -	amdgpu_mn_read_unlock(amn);
> -}
> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>   
> -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>   	[AMDGPU_MN_TYPE_GFX] = {
> -		.release = amdgpu_mn_release,
> -		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
> -		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
> +		.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
> +		.release = amdgpu_hmm_mirror_release
>   	},
>   	[AMDGPU_MN_TYPE_HSA] = {
> -		.release = amdgpu_mn_release,
> -		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
> -		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
> +		.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
> +		.release = amdgpu_hmm_mirror_release
>   	},
>   };
>   
> -/* Low bits of any reasonable mm pointer will be unused due to struct
> - * alignment. Use these bits to make a unique key from the mm pointer
> - * and notifier type.
> - */
> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
> -
>   /**
> - * amdgpu_mn_get - create notifier context
> + * amdgpu_mn_get - create HMM mirror context
>    *
>    * @adev: amdgpu device pointer
>    * @type: type of MMU notifier context
>    *
> - * Creates a notifier context for current->mm.
> + * Creates a HMM mirror context for current->mm.
>    */
>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>   				enum amdgpu_mn_type type)
> @@ -401,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>   	amn->mm = mm;
>   	init_rwsem(&amn->lock);
>   	amn->type = type;
> -	amn->mn.ops = &amdgpu_mn_ops[type];
>   	amn->objects = RB_ROOT_CACHED;
>   	mutex_init(&amn->read_lock);
>   	atomic_set(&amn->recursion, 0);
>   
> -	r = __mmu_notifier_register(&amn->mn, mm);
> +	amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
> +	r = hmm_mirror_register(&amn->mirror, mm);
>   	if (r)
>   		goto free_amn;
>   
> @@ -432,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>    * @bo: amdgpu buffer object
>    * @addr: userptr addr we should monitor
>    *
> - * Registers an MMU notifier for the given BO at the specified address.
> + * Registers an HMM mirror for the given BO at the specified address.
>    * Returns 0 on success, -ERRNO if anything goes wrong.
>    */
>   int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
> @@ -488,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>   }
>   
>   /**
> - * amdgpu_mn_unregister - unregister a BO for notifier updates
> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>    *
>    * @bo: amdgpu buffer object
>    *
> - * Remove any registration of MMU notifier updates from the buffer object.
> + * Remove any registration of HMM mirror updates from the buffer object.
>    */
>   void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>   {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> index eb0f432f78fe..0a51fd00021c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>   	AMDGPU_MN_TYPE_HSA,
>   };
>   
> -#if defined(CONFIG_MMU_NOTIFIER)
> +#if defined(CONFIG_HMM_MIRROR)
>   void amdgpu_mn_lock(struct amdgpu_mn *mn);
>   void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>   struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
       [not found] ` <20190204150613.5837-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-04 15:06   ` Yang, Philip
       [not found]     ` <20190204150613.5837-2-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: Yang, Philip @ 2019-02-04 15:06 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Yang, Philip

Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
DRM_AMDGPU_USERPTR Kconfig.

It supports both KFD userptr and gfx userptr paths.

The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
kernel now.

Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
 drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 139 +++++++++++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
 4 files changed, 67 insertions(+), 82 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 9221e5489069..960a63355705 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
 config DRM_AMDGPU_USERPTR
 	bool "Always enable userptr write support"
 	depends on DRM_AMDGPU
-	select MMU_NOTIFIER
+	select HMM_MIRROR
 	help
-	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
-	  selected to enabled full userptr support.
+	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
+	  isn't already selected to enabled full userptr support.
 
 config DRM_AMDGPU_GART_DEBUGFS
 	bool "Allow GART access through debugfs"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 466da5954a68..851001ced5e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -172,7 +172,7 @@ endif
 amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
 amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
 amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
 
 include $(FULL_AMD_PATH)/powerplay/Makefile
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 3e6823fdd939..5d518d2bb9be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -45,7 +45,7 @@
 
 #include <linux/firmware.h>
 #include <linux/module.h>
-#include <linux/mmu_notifier.h>
+#include <linux/hmm.h>
 #include <linux/interval_tree.h>
 #include <drm/drmP.h>
 #include <drm/drm.h>
@@ -58,7 +58,6 @@
  *
  * @adev: amdgpu device pointer
  * @mm: process address space
- * @mn: MMU notifier structure
  * @type: type of MMU notifier
  * @work: destruction work item
  * @node: hash table node to find structure by adev and mn
@@ -66,6 +65,7 @@
  * @objects: interval tree containing amdgpu_mn_nodes
  * @read_lock: mutex for recursive locking of @lock
  * @recursion: depth of recursion
+ * @mirror: HMM mirror function support
  *
  * Data for each amdgpu device and process address space.
  */
@@ -73,7 +73,6 @@ struct amdgpu_mn {
 	/* constant after initialisation */
 	struct amdgpu_device	*adev;
 	struct mm_struct	*mm;
-	struct mmu_notifier	mn;
 	enum amdgpu_mn_type	type;
 
 	/* only used on destruction */
@@ -87,6 +86,9 @@ struct amdgpu_mn {
 	struct rb_root_cached	objects;
 	struct mutex		read_lock;
 	atomic_t		recursion;
+
+	/* HMM mirror */
+	struct hmm_mirror	mirror;
 };
 
 /**
@@ -103,7 +105,7 @@ struct amdgpu_mn_node {
 };
 
 /**
- * amdgpu_mn_destroy - destroy the MMU notifier
+ * amdgpu_mn_destroy - destroy the HMM mirror
  *
  * @work: previously sheduled work item
  *
@@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
 	}
 	up_write(&amn->lock);
 	mutex_unlock(&adev->mn_lock);
-	mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
+
+	hmm_mirror_unregister(&amn->mirror);
 	kfree(amn);
 }
 
 /**
- * amdgpu_mn_release - callback to notify about mm destruction
+ * amdgpu_hmm_mirror_release - callback to notify about mm destruction
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
+ * @mirror: the HMM mirror (mm) this callback is about
  *
- * Shedule a work item to lazy destroy our notifier.
+ * Shedule a work item to lazy destroy HMM mirror.
  */
-static void amdgpu_mn_release(struct mmu_notifier *mn,
-			      struct mm_struct *mm)
+static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
 
 	INIT_WORK(&amn->work, amdgpu_mn_destroy);
 	schedule_work(&amn->work);
 }
 
-
 /**
  * amdgpu_mn_lock - take the write side lock for this notifier
  *
@@ -237,141 +237,126 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 /**
  * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
  *
- * @mn: our notifier
- * @range: mmu notifier context
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
  *
  * Block for operations on BOs to finish and mark pages as accessed and
  * potentially dirty.
  */
-static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
-			const struct mmu_notifier_range *range)
+static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
+			const struct hmm_update *update)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+	unsigned long start = update->start;
+	unsigned long end = update->end;
+	bool blockable = update->blockable;
 	struct interval_tree_node *it;
-	unsigned long end;
 
 	/* notification is exclusive, but interval is inclusive */
-	end = range->end - 1;
+	end -= 1;
 
 	/* TODO we should be able to split locking for interval tree and
 	 * amdgpu_mn_invalidate_node
 	 */
-	if (amdgpu_mn_read_lock(amn, range->blockable))
+	if (amdgpu_mn_read_lock(amn, blockable))
 		return -EAGAIN;
 
-	it = interval_tree_iter_first(&amn->objects, range->start, end);
+	it = interval_tree_iter_first(&amn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
 
-		if (!range->blockable) {
+		if (!blockable) {
 			amdgpu_mn_read_unlock(amn);
 			return -EAGAIN;
 		}
 
 		node = container_of(it, struct amdgpu_mn_node, it);
-		it = interval_tree_iter_next(it, range->start, end);
+		it = interval_tree_iter_next(it, start, end);
 
-		amdgpu_mn_invalidate_node(node, range->start, end);
+		amdgpu_mn_invalidate_node(node, start, end);
 	}
 
+	amdgpu_mn_read_unlock(amn);
+
 	return 0;
 }
 
 /**
  * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
  *
  * We temporarily evict all BOs between start and end. This
  * necessitates evicting all user-mode queues of the process. The BOs
  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
  */
-static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
-			const struct mmu_notifier_range *range)
+static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
+			const struct hmm_update *update)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+	unsigned long start = update->start;
+	unsigned long end = update->end;
+	bool blockable = update->blockable;
 	struct interval_tree_node *it;
-	unsigned long end;
 
 	/* notification is exclusive, but interval is inclusive */
-	end = range->end - 1;
+	end -= 1;
 
-	if (amdgpu_mn_read_lock(amn, range->blockable))
+	if (amdgpu_mn_read_lock(amn, blockable))
 		return -EAGAIN;
 
-	it = interval_tree_iter_first(&amn->objects, range->start, end);
+	it = interval_tree_iter_first(&amn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
 		struct amdgpu_bo *bo;
 
-		if (!range->blockable) {
+		if (!blockable) {
 			amdgpu_mn_read_unlock(amn);
 			return -EAGAIN;
 		}
 
 		node = container_of(it, struct amdgpu_mn_node, it);
-		it = interval_tree_iter_next(it, range->start, end);
+		it = interval_tree_iter_next(it, start, end);
 
 		list_for_each_entry(bo, &node->bos, mn_list) {
 			struct kgd_mem *mem = bo->kfd_bo;
 
 			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
-							 range->start,
-							 end))
-				amdgpu_amdkfd_evict_userptr(mem, range->mm);
+							 start, end))
+				amdgpu_amdkfd_evict_userptr(mem, amn->mm);
 		}
 	}
 
+	amdgpu_mn_read_unlock(amn);
+
 	return 0;
 }
 
-/**
- * amdgpu_mn_invalidate_range_end - callback to notify about mm change
- *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
- *
- * Release the lock again to allow new command submissions.
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
  */
-static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
-			const struct mmu_notifier_range *range)
-{
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
-
-	amdgpu_mn_read_unlock(amn);
-}
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
 
-static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
 	[AMDGPU_MN_TYPE_GFX] = {
-		.release = amdgpu_mn_release,
-		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
-		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+		.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
+		.release = amdgpu_hmm_mirror_release
 	},
 	[AMDGPU_MN_TYPE_HSA] = {
-		.release = amdgpu_mn_release,
-		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
-		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+		.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
+		.release = amdgpu_hmm_mirror_release
 	},
 };
 
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
 /**
- * amdgpu_mn_get - create notifier context
+ * amdgpu_mn_get - create HMM mirror context
  *
  * @adev: amdgpu device pointer
  * @type: type of MMU notifier context
  *
- * Creates a notifier context for current->mm.
+ * Creates a HMM mirror context for current->mm.
  */
 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 				enum amdgpu_mn_type type)
@@ -401,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 	amn->mm = mm;
 	init_rwsem(&amn->lock);
 	amn->type = type;
-	amn->mn.ops = &amdgpu_mn_ops[type];
 	amn->objects = RB_ROOT_CACHED;
 	mutex_init(&amn->read_lock);
 	atomic_set(&amn->recursion, 0);
 
-	r = __mmu_notifier_register(&amn->mn, mm);
+	amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
+	r = hmm_mirror_register(&amn->mirror, mm);
 	if (r)
 		goto free_amn;
 
@@ -432,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
  * @bo: amdgpu buffer object
  * @addr: userptr addr we should monitor
  *
- * Registers an MMU notifier for the given BO at the specified address.
+ * Registers an HMM mirror for the given BO at the specified address.
  * Returns 0 on success, -ERRNO if anything goes wrong.
  */
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
@@ -488,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 }
 
 /**
- * amdgpu_mn_unregister - unregister a BO for notifier updates
+ * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
  *
  * @bo: amdgpu buffer object
  *
- * Remove any registration of MMU notifier updates from the buffer object.
+ * Remove any registration of HMM mirror updates from the buffer object.
  */
 void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index eb0f432f78fe..0a51fd00021c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -34,7 +34,7 @@ enum amdgpu_mn_type {
 	AMDGPU_MN_TYPE_HSA,
 };
 
-#if defined(CONFIG_MMU_NOTIFIER)
+#if defined(CONFIG_HMM_MIRROR)
 void amdgpu_mn_lock(struct amdgpu_mn *mn);
 void amdgpu_mn_unlock(struct amdgpu_mn *mn);
 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
@ 2019-01-10 17:02 Yang, Philip
  0 siblings, 0 replies; 13+ messages in thread
From: Yang, Philip @ 2019-01-10 17:02 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Yang, Philip

Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
DRM_AMDGPU_USERPTR Kconfig.

It supports both KFD userptr and gfx userptr paths.

The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
kernel now.

Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/Kconfig     |   6 +-
 drivers/gpu/drm/amd/amdgpu/Makefile    |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 122 ++++++++++---------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h |   2 +-
 4 files changed, 55 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 9221e5489069..960a63355705 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
 config DRM_AMDGPU_USERPTR
 	bool "Always enable userptr write support"
 	depends on DRM_AMDGPU
-	select MMU_NOTIFIER
+	select HMM_MIRROR
 	help
-	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
-	  selected to enabled full userptr support.
+	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
+	  isn't already selected to enabled full userptr support.
 
 config DRM_AMDGPU_GART_DEBUGFS
 	bool "Allow GART access through debugfs"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index f76bcb9c45e4..675efc850ff4 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -172,7 +172,7 @@ endif
 amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
 amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
 amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
 
 include $(FULL_AMD_PATH)/powerplay/Makefile
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index e55508b39496..5d518d2bb9be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -45,7 +45,7 @@
 
 #include <linux/firmware.h>
 #include <linux/module.h>
-#include <linux/mmu_notifier.h>
+#include <linux/hmm.h>
 #include <linux/interval_tree.h>
 #include <drm/drmP.h>
 #include <drm/drm.h>
@@ -58,7 +58,6 @@
  *
  * @adev: amdgpu device pointer
  * @mm: process address space
- * @mn: MMU notifier structure
  * @type: type of MMU notifier
  * @work: destruction work item
  * @node: hash table node to find structure by adev and mn
@@ -66,6 +65,7 @@
  * @objects: interval tree containing amdgpu_mn_nodes
  * @read_lock: mutex for recursive locking of @lock
  * @recursion: depth of recursion
+ * @mirror: HMM mirror function support
  *
  * Data for each amdgpu device and process address space.
  */
@@ -73,7 +73,6 @@ struct amdgpu_mn {
 	/* constant after initialisation */
 	struct amdgpu_device	*adev;
 	struct mm_struct	*mm;
-	struct mmu_notifier	mn;
 	enum amdgpu_mn_type	type;
 
 	/* only used on destruction */
@@ -87,6 +86,9 @@ struct amdgpu_mn {
 	struct rb_root_cached	objects;
 	struct mutex		read_lock;
 	atomic_t		recursion;
+
+	/* HMM mirror */
+	struct hmm_mirror	mirror;
 };
 
 /**
@@ -103,7 +105,7 @@ struct amdgpu_mn_node {
 };
 
 /**
- * amdgpu_mn_destroy - destroy the MMU notifier
+ * amdgpu_mn_destroy - destroy the HMM mirror
  *
  * @work: previously sheduled work item
  *
@@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
 	}
 	up_write(&amn->lock);
 	mutex_unlock(&adev->mn_lock);
-	mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
+
+	hmm_mirror_unregister(&amn->mirror);
 	kfree(amn);
 }
 
 /**
- * amdgpu_mn_release - callback to notify about mm destruction
+ * amdgpu_hmm_mirror_release - callback to notify about mm destruction
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
+ * @mirror: the HMM mirror (mm) this callback is about
  *
- * Shedule a work item to lazy destroy our notifier.
+ * Shedule a work item to lazy destroy HMM mirror.
  */
-static void amdgpu_mn_release(struct mmu_notifier *mn,
-			      struct mm_struct *mm)
+static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
 
 	INIT_WORK(&amn->work, amdgpu_mn_destroy);
 	schedule_work(&amn->work);
 }
 
-
 /**
  * amdgpu_mn_lock - take the write side lock for this notifier
  *
@@ -237,21 +237,19 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 /**
  * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
  *
  * Block for operations on BOs to finish and mark pages as accessed and
  * potentially dirty.
  */
-static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
-						 struct mm_struct *mm,
-						 unsigned long start,
-						 unsigned long end,
-						 bool blockable)
+static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
+			const struct hmm_update *update)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+	unsigned long start = update->start;
+	unsigned long end = update->end;
+	bool blockable = update->blockable;
 	struct interval_tree_node *it;
 
 	/* notification is exclusive, but interval is inclusive */
@@ -278,28 +276,28 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 		amdgpu_mn_invalidate_node(node, start, end);
 	}
 
+	amdgpu_mn_read_unlock(amn);
+
 	return 0;
 }
 
 /**
  * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
  *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
  *
  * We temporarily evict all BOs between start and end. This
  * necessitates evicting all user-mode queues of the process. The BOs
  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
  */
-static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
-						 struct mm_struct *mm,
-						 unsigned long start,
-						 unsigned long end,
-						 bool blockable)
+static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
+			const struct hmm_update *update)
 {
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+	unsigned long start = update->start;
+	unsigned long end = update->end;
+	bool blockable = update->blockable;
 	struct interval_tree_node *it;
 
 	/* notification is exclusive, but interval is inclusive */
@@ -326,59 +324,39 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 
 			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
 							 start, end))
-				amdgpu_amdkfd_evict_userptr(mem, mm);
+				amdgpu_amdkfd_evict_userptr(mem, amn->mm);
 		}
 	}
 
+	amdgpu_mn_read_unlock(amn);
+
 	return 0;
 }
 
-/**
- * amdgpu_mn_invalidate_range_end - callback to notify about mm change
- *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
- *
- * Release the lock again to allow new command submissions.
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
  */
-static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
-					   struct mm_struct *mm,
-					   unsigned long start,
-					   unsigned long end)
-{
-	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
-
-	amdgpu_mn_read_unlock(amn);
-}
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
 
-static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
 	[AMDGPU_MN_TYPE_GFX] = {
-		.release = amdgpu_mn_release,
-		.invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
-		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+		.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
+		.release = amdgpu_hmm_mirror_release
 	},
 	[AMDGPU_MN_TYPE_HSA] = {
-		.release = amdgpu_mn_release,
-		.invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
-		.invalidate_range_end = amdgpu_mn_invalidate_range_end,
+		.sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
+		.release = amdgpu_hmm_mirror_release
 	},
 };
 
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
 /**
- * amdgpu_mn_get - create notifier context
+ * amdgpu_mn_get - create HMM mirror context
  *
  * @adev: amdgpu device pointer
  * @type: type of MMU notifier context
  *
- * Creates a notifier context for current->mm.
+ * Creates a HMM mirror context for current->mm.
  */
 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 				enum amdgpu_mn_type type)
@@ -408,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 	amn->mm = mm;
 	init_rwsem(&amn->lock);
 	amn->type = type;
-	amn->mn.ops = &amdgpu_mn_ops[type];
 	amn->objects = RB_ROOT_CACHED;
 	mutex_init(&amn->read_lock);
 	atomic_set(&amn->recursion, 0);
 
-	r = __mmu_notifier_register(&amn->mn, mm);
+	amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
+	r = hmm_mirror_register(&amn->mirror, mm);
 	if (r)
 		goto free_amn;
 
@@ -439,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
  * @bo: amdgpu buffer object
  * @addr: userptr addr we should monitor
  *
- * Registers an MMU notifier for the given BO at the specified address.
+ * Registers an HMM mirror for the given BO at the specified address.
  * Returns 0 on success, -ERRNO if anything goes wrong.
  */
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
@@ -495,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 }
 
 /**
- * amdgpu_mn_unregister - unregister a BO for notifier updates
+ * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
  *
  * @bo: amdgpu buffer object
  *
- * Remove any registration of MMU notifier updates from the buffer object.
+ * Remove any registration of HMM mirror updates from the buffer object.
  */
 void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index eb0f432f78fe..0a51fd00021c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -34,7 +34,7 @@ enum amdgpu_mn_type {
 	AMDGPU_MN_TYPE_HSA,
 };
 
-#if defined(CONFIG_MMU_NOTIFIER)
+#if defined(CONFIG_HMM_MIRROR)
 void amdgpu_mn_lock(struct amdgpu_mn *mn);
 void amdgpu_mn_unlock(struct amdgpu_mn *mn);
 struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2019-02-04 18:09 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-12-06 21:02 [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6 Yang, Philip
     [not found] ` <20181206210235.12036-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2018-12-06 21:02   ` [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency Yang, Philip
2018-12-06 21:02   ` [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v3 Yang, Philip
     [not found]     ` <20181206210235.12036-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2018-12-11  0:12       ` Kuehling, Felix
     [not found]         ` <b5ff9fa4-97c9-4524-d4dd-34c8003274fa-5C7GfCeVMHo@public.gmane.org>
2018-12-11  8:27           ` Christian König
2018-12-13 20:56           ` Yang, Philip
2018-12-07 12:00   ` [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6 Zhou, David(ChunMing)
     [not found]     ` <BY1PR12MB0502882984A59380E5F17F66B4AA0-PicGAnIBOobrCwm+z9iKNgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2018-12-13 21:15       ` Yang, Philip
2019-01-10 17:02 Yang, Philip
2019-02-04 15:06 [PATCH 0/3] Use HMM to replace get_user_pages Yang, Philip
     [not found] ` <20190204150613.5837-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-02-04 15:06   ` [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6 Yang, Philip
     [not found]     ` <20190204150613.5837-2-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-02-04 15:18       ` Christian König
     [not found]         ` <71e6094c-a257-7aa3-854c-9a9dff163b01-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-02-04 17:17           ` Yang, Philip
     [not found]             ` <ee46d8e7-cf64-eb8c-cfd6-793db28c6c69-5C7GfCeVMHo@public.gmane.org>
2019-02-04 18:09               ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.