* [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
@ 2019-01-10 17:02 Yang, Philip
[not found] ` <20190110170228.10917-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 11+ messages in thread
From: Yang, Philip @ 2019-01-10 17:02 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Yang, Philip
Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
DRM_AMDGPU_USERPTR Kconfig.
It supports both KFD userptr and gfx userptr paths.
The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
kernel now.
Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/Kconfig | 6 +-
drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 122 ++++++++++---------------
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 2 +-
4 files changed, 55 insertions(+), 77 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 9221e5489069..960a63355705 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
config DRM_AMDGPU_USERPTR
bool "Always enable userptr write support"
depends on DRM_AMDGPU
- select MMU_NOTIFIER
+ select HMM_MIRROR
help
- This option selects CONFIG_MMU_NOTIFIER if it isn't already
- selected to enabled full userptr support.
+ This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
+ isn't already selected to enabled full userptr support.
config DRM_AMDGPU_GART_DEBUGFS
bool "Allow GART access through debugfs"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index f76bcb9c45e4..675efc850ff4 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -172,7 +172,7 @@ endif
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
include $(FULL_AMD_PATH)/powerplay/Makefile
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index e55508b39496..5d518d2bb9be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -45,7 +45,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
-#include <linux/mmu_notifier.h>
+#include <linux/hmm.h>
#include <linux/interval_tree.h>
#include <drm/drmP.h>
#include <drm/drm.h>
@@ -58,7 +58,6 @@
*
* @adev: amdgpu device pointer
* @mm: process address space
- * @mn: MMU notifier structure
* @type: type of MMU notifier
* @work: destruction work item
* @node: hash table node to find structure by adev and mn
@@ -66,6 +65,7 @@
* @objects: interval tree containing amdgpu_mn_nodes
* @read_lock: mutex for recursive locking of @lock
* @recursion: depth of recursion
+ * @mirror: HMM mirror function support
*
* Data for each amdgpu device and process address space.
*/
@@ -73,7 +73,6 @@ struct amdgpu_mn {
/* constant after initialisation */
struct amdgpu_device *adev;
struct mm_struct *mm;
- struct mmu_notifier mn;
enum amdgpu_mn_type type;
/* only used on destruction */
@@ -87,6 +86,9 @@ struct amdgpu_mn {
struct rb_root_cached objects;
struct mutex read_lock;
atomic_t recursion;
+
+ /* HMM mirror */
+ struct hmm_mirror mirror;
};
/**
@@ -103,7 +105,7 @@ struct amdgpu_mn_node {
};
/**
- * amdgpu_mn_destroy - destroy the MMU notifier
+ * amdgpu_mn_destroy - destroy the HMM mirror
*
* @work: previously sheduled work item
*
@@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
}
up_write(&amn->lock);
mutex_unlock(&adev->mn_lock);
- mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
+
+ hmm_mirror_unregister(&amn->mirror);
kfree(amn);
}
/**
- * amdgpu_mn_release - callback to notify about mm destruction
+ * amdgpu_hmm_mirror_release - callback to notify about mm destruction
*
- * @mn: our notifier
- * @mm: the mm this callback is about
+ * @mirror: the HMM mirror (mm) this callback is about
*
- * Shedule a work item to lazy destroy our notifier.
+ * Shedule a work item to lazy destroy HMM mirror.
*/
-static void amdgpu_mn_release(struct mmu_notifier *mn,
- struct mm_struct *mm)
+static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
INIT_WORK(&amn->work, amdgpu_mn_destroy);
schedule_work(&amn->work);
}
-
/**
* amdgpu_mn_lock - take the write side lock for this notifier
*
@@ -237,21 +237,19 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
/**
* amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
*
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
*
* Block for operations on BOs to finish and mark pages as accessed and
* potentially dirty.
*/
-static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end,
- bool blockable)
+static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
+ const struct hmm_update *update)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+ unsigned long start = update->start;
+ unsigned long end = update->end;
+ bool blockable = update->blockable;
struct interval_tree_node *it;
/* notification is exclusive, but interval is inclusive */
@@ -278,28 +276,28 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
amdgpu_mn_invalidate_node(node, start, end);
}
+ amdgpu_mn_read_unlock(amn);
+
return 0;
}
/**
* amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
*
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
*
* We temporarily evict all BOs between start and end. This
* necessitates evicting all user-mode queues of the process. The BOs
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
*/
-static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end,
- bool blockable)
+static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
+ const struct hmm_update *update)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+ unsigned long start = update->start;
+ unsigned long end = update->end;
+ bool blockable = update->blockable;
struct interval_tree_node *it;
/* notification is exclusive, but interval is inclusive */
@@ -326,59 +324,39 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
start, end))
- amdgpu_amdkfd_evict_userptr(mem, mm);
+ amdgpu_amdkfd_evict_userptr(mem, amn->mm);
}
}
+ amdgpu_mn_read_unlock(amn);
+
return 0;
}
-/**
- * amdgpu_mn_invalidate_range_end - callback to notify about mm change
- *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
- *
- * Release the lock again to allow new command submissions.
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
*/
-static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
-{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
-
- amdgpu_mn_read_unlock(amn);
-}
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
[AMDGPU_MN_TYPE_GFX] = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
+ .release = amdgpu_hmm_mirror_release
},
[AMDGPU_MN_TYPE_HSA] = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
+ .release = amdgpu_hmm_mirror_release
},
};
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
/**
- * amdgpu_mn_get - create notifier context
+ * amdgpu_mn_get - create HMM mirror context
*
* @adev: amdgpu device pointer
* @type: type of MMU notifier context
*
- * Creates a notifier context for current->mm.
+ * Creates a HMM mirror context for current->mm.
*/
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type)
@@ -408,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
amn->mm = mm;
init_rwsem(&amn->lock);
amn->type = type;
- amn->mn.ops = &amdgpu_mn_ops[type];
amn->objects = RB_ROOT_CACHED;
mutex_init(&amn->read_lock);
atomic_set(&amn->recursion, 0);
- r = __mmu_notifier_register(&amn->mn, mm);
+ amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
+ r = hmm_mirror_register(&amn->mirror, mm);
if (r)
goto free_amn;
@@ -439,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
* @bo: amdgpu buffer object
* @addr: userptr addr we should monitor
*
- * Registers an MMU notifier for the given BO at the specified address.
+ * Registers an HMM mirror for the given BO at the specified address.
* Returns 0 on success, -ERRNO if anything goes wrong.
*/
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
@@ -495,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
}
/**
- * amdgpu_mn_unregister - unregister a BO for notifier updates
+ * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
*
* @bo: amdgpu buffer object
*
- * Remove any registration of MMU notifier updates from the buffer object.
+ * Remove any registration of HMM mirror updates from the buffer object.
*/
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index eb0f432f78fe..0a51fd00021c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -34,7 +34,7 @@ enum amdgpu_mn_type {
AMDGPU_MN_TYPE_HSA,
};
-#if defined(CONFIG_MMU_NOTIFIER)
+#if defined(CONFIG_HMM_MIRROR)
void amdgpu_mn_lock(struct amdgpu_mn *mn);
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2
[not found] ` <20190110170228.10917-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
@ 2019-01-10 17:02 ` Yang, Philip
2019-01-10 17:02 ` [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6 Yang, Philip
1 sibling, 0 replies; 11+ messages in thread
From: Yang, Philip @ 2019-01-10 17:02 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Yang, Philip
There is circular lock between gfx and kfd path with HMM change:
lock(dqm) -> bo::reserve -> amdgpu_mn_lock
To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested
locking between mmap_sem and bo::reserve. The locking order
is: bo::reserve -> amdgpu_mn_lock(p->mn)
Change-Id: I2ec09a47571f6b4c8eaef93f22c0a600f5f70153
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 32 ++++++++++---------
1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 8372556b52eb..efe0d3c0215b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1156,21 +1156,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
int retval;
struct mqd_manager *mqd_mgr;
- retval = 0;
-
- dqm_lock(dqm);
-
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
pr_warn("Can't create new usermode queue because %d queues were already created\n",
dqm->total_queue_count);
retval = -EPERM;
- goto out_unlock;
+ goto out;
}
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
retval = allocate_sdma_queue(dqm, &q->sdma_id);
if (retval)
- goto out_unlock;
+ goto out;
q->properties.sdma_queue_id =
q->sdma_id / get_num_sdma_engines(dqm);
q->properties.sdma_engine_id =
@@ -1181,6 +1177,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (retval)
goto out_deallocate_sdma_queue;
+ /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order:
+ * lock(dqm) -> bo::reserve
+ */
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
@@ -1188,6 +1187,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
retval = -ENOMEM;
goto out_deallocate_doorbell;
}
+
/*
* Eviction state logic: we only mark active queues as evicted
* to avoid the overhead of restoring inactive queues later
@@ -1196,9 +1196,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
q->properties.is_evicted = (q->properties.queue_size > 0 &&
q->properties.queue_percent > 0 &&
q->properties.queue_address != 0);
-
dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
-
q->properties.tba_addr = qpd->tba_addr;
q->properties.tma_addr = qpd->tma_addr;
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
@@ -1206,6 +1204,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (retval)
goto out_deallocate_doorbell;
+ dqm_lock(dqm);
+
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
if (q->properties.is_active) {
@@ -1233,9 +1233,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
out_deallocate_sdma_queue:
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q->sdma_id);
-out_unlock:
- dqm_unlock(dqm);
-
+out:
return retval;
}
@@ -1398,8 +1396,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
qpd->reset_wavefronts = true;
}
- mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
-
/*
* Unconditionally decrement this counter, regardless of the queue's
* type
@@ -1410,6 +1406,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
dqm_unlock(dqm);
+ /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
+ mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
+
return retval;
failed:
@@ -1631,7 +1630,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
qpd->reset_wavefronts = false;
}
- /* lastly, free mqd resources */
+ dqm_unlock(dqm);
+
+ /* Lastly, free mqd resources.
+ * Do uninit_mqd() after dqm_unlock to avoid circular locking.
+ */
list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
@@ -1645,7 +1648,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
}
out:
- dqm_unlock(dqm);
return retval;
}
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6
[not found] ` <20190110170228.10917-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-01-10 17:02 ` [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 Yang, Philip
@ 2019-01-10 17:02 ` Yang, Philip
[not found] ` <20190110170228.10917-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
1 sibling, 1 reply; 11+ messages in thread
From: Yang, Philip @ 2019-01-10 17:02 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Yang, Philip
Use HMM helper function hmm_vma_fault() to get physical pages backing
userptr and start CPU page table update track of those pages. Then use
hmm_vma_range_done() to check if those pages are updated before
amdgpu_cs_submit for gfx or before user queues are resumed for kfd.
If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
from scratch, for kfd, restore worker is rescheduled to retry.
HMM simplify the CPU page table concurrent update check, so remove
guptasklock, mmu_invalidations, last_set_pages fields from
amdgpu_ttm_tt struct.
HMM does not pin the page (increase page ref count), so remove related
operations like release_pages(), put_page(), mark_page_dirty().
Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 -
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++-------
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 158 +++++++---------
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 25 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 4 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 173 ++++++++----------
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +-
9 files changed, 198 insertions(+), 277 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 0b31a1859023..0e1711a75b68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -61,7 +61,6 @@ struct kgd_mem {
atomic_t invalid;
struct amdkfd_process_info *process_info;
- struct page **user_pages;
struct amdgpu_sync sync;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index d7b10d79f1de..ae2d838d31ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
goto out;
}
- /* If no restore worker is running concurrently, user_pages
- * should not be allocated
- */
- WARN(mem->user_pages, "Leaking user_pages array");
-
- mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
- sizeof(struct page *),
- GFP_KERNEL | __GFP_ZERO);
- if (!mem->user_pages) {
- pr_err("%s: Failed to allocate pages array\n", __func__);
- ret = -ENOMEM;
- goto unregister_out;
- }
-
- ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
if (ret) {
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
- goto free_out;
+ goto unregister_out;
}
- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
-
ret = amdgpu_bo_reserve(bo, true);
if (ret) {
pr_err("%s: Failed to reserve BO\n", __func__);
@@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
amdgpu_bo_unreserve(bo);
release_out:
- if (ret)
- release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
-free_out:
- kvfree(mem->user_pages);
- mem->user_pages = NULL;
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
unregister_out:
if (ret)
amdgpu_mn_unregister(bo);
@@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.num_shared = 1;
- ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
@@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
ctx->kfd_bo.priority = 0;
ctx->kfd_bo.tv.bo = &bo->tbo;
ctx->kfd_bo.tv.num_shared = 1;
- ctx->kfd_bo.user_pages = NULL;
list_add(&ctx->kfd_bo.tv.head, &ctx->list);
i = 0;
@@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
list_del(&bo_list_entry->head);
mutex_unlock(&process_info->lock);
- /* Free user pages if necessary */
- if (mem->user_pages) {
- pr_debug("%s: Freeing user_pages array\n", __func__);
- if (mem->user_pages[0])
- release_pages(mem->user_pages,
- mem->bo->tbo.ttm->num_pages);
- kvfree(mem->user_pages);
- }
-
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
if (unlikely(ret))
return ret;
@@ -1855,25 +1824,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
bo = mem->bo;
- if (!mem->user_pages) {
- mem->user_pages =
- kvmalloc_array(bo->tbo.ttm->num_pages,
- sizeof(struct page *),
- GFP_KERNEL | __GFP_ZERO);
- if (!mem->user_pages) {
- pr_err("%s: Failed to allocate pages array\n",
- __func__);
- return -ENOMEM;
- }
- } else if (mem->user_pages[0]) {
- release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
- }
-
/* Get updated user pages */
ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
- mem->user_pages);
+ bo->tbo.ttm->pages);
if (ret) {
- mem->user_pages[0] = NULL;
+ bo->tbo.ttm->pages[0] = NULL;
pr_info("%s: Failed to get user pages: %d\n",
__func__, ret);
/* Pretend it succeeded. It will fail later
@@ -1882,12 +1837,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
* stalled user mode queues.
*/
}
-
- /* Mark the BO as valid unless it was invalidated
- * again concurrently
- */
- if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
- return -EAGAIN;
}
return 0;
@@ -1917,7 +1866,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
GFP_KERNEL);
if (!pd_bo_list_entries) {
pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_no_mem;
}
INIT_LIST_HEAD(&resv_list);
@@ -1941,7 +1891,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
WARN(!list_empty(&duplicates), "Duplicates should be empty");
if (ret)
- goto out;
+ goto out_free;
amdgpu_sync_create(&sync);
@@ -1967,10 +1917,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
bo = mem->bo;
- /* Copy pages array and validate the BO if we got user pages */
- if (mem->user_pages[0]) {
- amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
- mem->user_pages);
+ /* Validate the BO if we got user pages */
+ if (bo->tbo.ttm->pages[0]) {
amdgpu_bo_placement_from_domain(bo, mem->domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (ret) {
@@ -1979,16 +1927,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
}
}
- /* Validate succeeded, now the BO owns the pages, free
- * our copy of the pointer array. Put this BO back on
- * the userptr_valid_list. If we need to revalidate
- * it, we need to start from scratch.
- */
- kvfree(mem->user_pages);
- mem->user_pages = NULL;
list_move_tail(&mem->validate_list.head,
&process_info->userptr_valid_list);
+ /* Stop HMM track the userptr update. We dont check the return
+ * value for concurrent CPU page table update because we will
+ * reschedule the restore worker if process_info->evicted_bos
+ * is updated.
+ */
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+
/* Update mapping. If the BO was not validated
* (because we couldn't get user pages), this will
* clear the page table entries, which will result in
@@ -2022,8 +1970,15 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
ttm_eu_backoff_reservation(&ticket, &resv_list);
amdgpu_sync_wait(&sync, false);
amdgpu_sync_free(&sync);
-out:
+out_free:
kfree(pd_bo_list_entries);
+out_no_mem:
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_inval_list,
+ validate_list.head) {
+ bo = mem->bo;
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ }
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 7c5f5d1601e6..a130e766cbdb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry {
struct amdgpu_bo_va *bo_va;
uint32_t priority;
struct page **user_pages;
- int user_invalidated;
+ bool user_invalidated;
};
struct amdgpu_bo_list {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1c49b8266d69..451a1fab17d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
p->uf_entry.tv.bo = &bo->tbo;
/* One for TTM and one for the CS job */
p->uf_entry.tv.num_shared = 2;
- p->uf_entry.user_pages = NULL;
drm_gem_object_put_unlocked(gobj);
@@ -539,14 +538,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
if (usermm && usermm != current->mm)
return -EPERM;
- /* Check if we have user pages and nobody bound the BO already */
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
- lobj->user_pages) {
+ if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
+ lobj->user_invalidated && lobj->user_pages) {
amdgpu_bo_placement_from_domain(bo,
AMDGPU_GEM_DOMAIN_CPU);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
return r;
+
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
lobj->user_pages);
binding_userptr = true;
@@ -577,7 +576,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
struct amdgpu_bo *gds;
struct amdgpu_bo *gws;
struct amdgpu_bo *oa;
- unsigned tries = 10;
int r;
INIT_LIST_HEAD(&p->validated);
@@ -613,79 +611,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
list_add(&p->uf_entry.tv.head, &p->validated);
- while (1) {
- struct list_head need_pages;
-
- r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
- &duplicates);
- if (unlikely(r != 0)) {
- if (r != -ERESTARTSYS)
- DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
- goto error_free_pages;
- }
-
- INIT_LIST_HEAD(&need_pages);
- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
- if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
- &e->user_invalidated) && e->user_pages) {
-
- /* We acquired a page array, but somebody
- * invalidated it. Free it and try again
- */
- release_pages(e->user_pages,
- bo->tbo.ttm->num_pages);
- kvfree(e->user_pages);
- e->user_pages = NULL;
- }
-
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
- !e->user_pages) {
- list_del(&e->tv.head);
- list_add(&e->tv.head, &need_pages);
-
- amdgpu_bo_unreserve(bo);
- }
+ /* Get userptr backing pages. If pages are updated after registered
+ * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
+ * amdgpu_ttm_backend_bind() to flush and invalidate new pages
+ */
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ bool userpage_invalidated = false;
+ int i;
+
+ e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+ sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!e->user_pages) {
+ DRM_ERROR("calloc failure\n");
+ return -ENOMEM;
}
- if (list_empty(&need_pages))
- break;
-
- /* Unreserve everything again. */
- ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
- /* We tried too many times, just abort */
- if (!--tries) {
- r = -EDEADLK;
- DRM_ERROR("deadlock in %s\n", __func__);
- goto error_free_pages;
+ r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages);
+ if (r) {
+ kvfree(e->user_pages);
+ e->user_pages = NULL;
+ return r;
}
- /* Fill the page arrays for all userptrs. */
- list_for_each_entry(e, &need_pages, tv.head) {
- struct ttm_tt *ttm = e->tv.bo->ttm;
-
- e->user_pages = kvmalloc_array(ttm->num_pages,
- sizeof(struct page*),
- GFP_KERNEL | __GFP_ZERO);
- if (!e->user_pages) {
- r = -ENOMEM;
- DRM_ERROR("calloc failure in %s\n", __func__);
- goto error_free_pages;
- }
-
- r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
- if (r) {
- DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
- kvfree(e->user_pages);
- e->user_pages = NULL;
- goto error_free_pages;
+ for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
+ if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
+ userpage_invalidated = true;
+ break;
}
}
+ e->user_invalidated = userpage_invalidated;
+ }
- /* And try again. */
- list_splice(&need_pages, &p->validated);
+ r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+ &duplicates);
+ if (unlikely(r != 0)) {
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
+ goto out;
}
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@@ -754,17 +718,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
error_validate:
if (r)
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
-error_free_pages:
-
- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- if (!e->user_pages)
- continue;
-
- release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
- kvfree(e->user_pages);
- }
-
+out:
return r;
}
@@ -1213,8 +1167,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
struct amdgpu_bo_list_entry *e;
struct amdgpu_job *job;
uint64_t seq;
-
- int r;
+ int r = 0;
job = p->job;
p->job = NULL;
@@ -1223,15 +1176,21 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
if (r)
goto error_unlock;
- /* No memory allocation is allowed while holding the mn lock */
+ /* No memory allocation is allowed while holding the mn lock.
+ * p->mn is hold until amdgpu_cs_submit is finished and fence is added
+ * to BOs.
+ */
amdgpu_mn_lock(p->mn);
+
+ /* If userptr are updated after amdgpu_cs_parser_bos(), restart cs */
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
- if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
- r = -ERESTARTSYS;
- goto error_abort;
- }
+ r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ }
+ if (r) {
+ r = -ERESTARTSYS;
+ goto error_abort;
}
job->owner = p->filp;
@@ -1278,14 +1237,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
- union drm_amdgpu_cs *cs = data;
- struct amdgpu_cs_parser parser = {};
- bool reserved_buffers = false;
+ union drm_amdgpu_cs *cs;
+ struct amdgpu_cs_parser parser;
+ bool reserved_buffers;
+ int tries = 10;
int i, r;
if (!adev->accel_working)
return -EBUSY;
+restart:
+ memset(&parser, 0, sizeof(parser));
+ cs = data;
+ reserved_buffers = false;
+
parser.adev = adev;
parser.filp = filp;
@@ -1327,6 +1292,15 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+
+ if (r == -ERESTARTSYS) {
+ if (!--tries) {
+ DRM_ERROR("Possible deadlock? Retry too many times\n");
+ return -EDEADLK;
+ }
+ goto restart;
+ }
+
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index f4f00217546e..92025993bfb1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -336,26 +336,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
r = amdgpu_bo_reserve(bo, true);
if (r)
- goto free_pages;
+ goto user_pages_done;
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
amdgpu_bo_unreserve(bo);
if (r)
- goto free_pages;
+ goto user_pages_done;
}
r = drm_gem_handle_create(filp, gobj, &handle);
- /* drop reference from allocate - handle holds it now */
- drm_gem_object_put_unlocked(gobj);
if (r)
- return r;
+ goto user_pages_done;
args->handle = handle;
- return 0;
-free_pages:
- release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
+user_pages_done:
+ if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
release_object:
drm_gem_object_put_unlocked(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 5d518d2bb9be..9d40f3001f3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
true, false, MAX_SCHEDULE_TIMEOUT);
if (r <= 0)
DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-
- amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
}
}
@@ -513,3 +511,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
mutex_unlock(&adev->mn_lock);
}
+/* flags used by HMM internal, not related to CPU/GPU PTE flags */
+static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
+ (1 << 0), /* HMM_PFN_VALID */
+ (1 << 1), /* HMM_PFN_WRITE */
+ 0 /* HMM_PFN_DEVICE_PRIVATE */
+};
+
+static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
+ 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
+ 0, /* HMM_PFN_NONE */
+ 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
+};
+
+void amdgpu_hmm_init_range(struct hmm_range *range)
+{
+ if (range) {
+ range->flags = hmm_range_flags;
+ range->values = hmm_range_values;
+ range->pfn_shift = PAGE_SHIFT;
+ range->pfns = NULL;
+ INIT_LIST_HEAD(&range->list);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index 0a51fd00021c..4803e216e174 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -25,9 +25,10 @@
#define __AMDGPU_MN_H__
/*
- * MMU Notifier
+ * HMM mirror
*/
struct amdgpu_mn;
+struct hmm_range;
enum amdgpu_mn_type {
AMDGPU_MN_TYPE_GFX,
@@ -41,6 +42,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
+void amdgpu_hmm_init_range(struct hmm_range *range);
#else
static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index c91ec3101d00..3d074f4b02f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
#include <linux/pagemap.h>
#include <linux/debugfs.h>
#include <linux/iommu.h>
+#include <linux/hmm.h>
#include "amdgpu.h"
#include "amdgpu_object.h"
#include "amdgpu_trace.h"
@@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
/*
* TTM backend functions.
*/
-struct amdgpu_ttm_gup_task_list {
- struct list_head list;
- struct task_struct *task;
-};
-
struct amdgpu_ttm_tt {
struct ttm_dma_tt ttm;
u64 offset;
@@ -718,85 +714,96 @@ struct amdgpu_ttm_tt {
uint32_t userflags;
spinlock_t guptasklock;
struct list_head guptasks;
- atomic_t mmu_invalidations;
- uint32_t last_set_pages;
+ struct hmm_range range;
};
/**
- * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
- * pointer to memory
+ * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
+ * memory and start HMM tracking CPU page table update
*
- * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
- * This provides a wrapper around the get_user_pages() call to provide
- * device accessible pages that back user memory.
+ * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
+ * once afterwards to stop HMM tracking
*/
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
struct mm_struct *mm = gtt->usertask->mm;
- unsigned int flags = 0;
- unsigned pinned = 0;
- int r;
+ unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
+ struct hmm_range *range = >t->range;
+ int r = 0, i;
if (!mm) /* Happens during process shutdown */
return -ESRCH;
- if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
- flags |= FOLL_WRITE;
+ amdgpu_hmm_init_range(range);
down_read(&mm->mmap_sem);
- if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
- /*
- * check that we only use anonymous memory to prevent problems
- * with writeback
- */
- unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
- struct vm_area_struct *vma;
+ range->vma = find_vma(mm, gtt->userptr);
+ if (!range_in_vma(range->vma, gtt->userptr, end))
+ r = -EFAULT;
+ else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+ range->vma->vm_file)
+ r = -EPERM;
+ if (r)
+ goto out;
- vma = find_vma(mm, gtt->userptr);
- if (!vma || vma->vm_file || vma->vm_end < end) {
- up_read(&mm->mmap_sem);
- return -EPERM;
- }
+ range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
+ GFP_KERNEL);
+ if (range->pfns == NULL) {
+ r = -ENOMEM;
+ goto out;
}
+ range->start = gtt->userptr;
+ range->end = end;
- /* loop enough times using contiguous pages of memory */
- do {
- unsigned num_pages = ttm->num_pages - pinned;
- uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
- struct page **p = pages + pinned;
- struct amdgpu_ttm_gup_task_list guptask;
+ range->pfns[0] = range->flags[HMM_PFN_VALID];
+ range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
+ 0 : range->flags[HMM_PFN_WRITE];
+ for (i = 1; i < ttm->num_pages; i++)
+ range->pfns[i] = range->pfns[0];
- guptask.task = current;
- spin_lock(>t->guptasklock);
- list_add(&guptask.list, >t->guptasks);
- spin_unlock(>t->guptasklock);
+ /* This may trigger page table update */
+ r = hmm_vma_fault(range, true);
+ if (r)
+ goto out_free_pfns;
- if (mm == current->mm)
- r = get_user_pages(userptr, num_pages, flags, p, NULL);
- else
- r = get_user_pages_remote(gtt->usertask,
- mm, userptr, num_pages,
- flags, p, NULL, NULL);
+ up_read(&mm->mmap_sem);
- spin_lock(>t->guptasklock);
- list_del(&guptask.list);
- spin_unlock(>t->guptasklock);
+ for (i = 0; i < ttm->num_pages; i++)
+ pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
- if (r < 0)
- goto release_pages;
+ return 0;
- pinned += r;
+out_free_pfns:
+ kvfree(range->pfns);
+ range->pfns = NULL;
+out:
+ up_read(&mm->mmap_sem);
+ return r;
+}
- } while (pinned < ttm->num_pages);
+/**
+ * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
+ * Check if the pages backing this ttm range have been invalidated
+ *
+ * Returns: true if pages are still valid
+ */
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
+{
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ bool r = false;
- up_read(&mm->mmap_sem);
- return 0;
+ if (!gtt || !gtt->userptr)
+ return false;
+
+ WARN_ONCE(!gtt->range.pfns, "No user pages to check\n");
+ if (gtt->range.pfns) {
+ r = hmm_vma_range_done(>t->range);
+ kvfree(gtt->range.pfns);
+ gtt->range.pfns = NULL;
+ }
-release_pages:
- release_pages(pages, pinned);
- up_read(&mm->mmap_sem);
return r;
}
@@ -809,16 +816,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
*/
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned i;
- gtt->last_set_pages = atomic_read(>t->mmu_invalidations);
- for (i = 0; i < ttm->num_pages; ++i) {
- if (ttm->pages[i])
- put_page(ttm->pages[i]);
-
+ for (i = 0; i < ttm->num_pages; ++i)
ttm->pages[i] = pages ? pages[i] : NULL;
- }
}
/**
@@ -903,10 +904,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
/* unmap the pages mapped to the device */
dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
- /* mark the pages as dirty */
- amdgpu_ttm_tt_mark_user_pages(ttm);
-
sg_free_table(ttm->sg);
+
+ if (gtt->range.pfns &&
+ ttm->pages[0] == hmm_pfn_to_page(>t->range, gtt->range.pfns[0]))
+ WARN_ONCE(1, "Missing get_user_page_done\n");
}
int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
@@ -1258,8 +1260,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
spin_lock_init(>t->guptasklock);
INIT_LIST_HEAD(>t->guptasks);
- atomic_set(>t->mmu_invalidations, 0);
- gtt->last_set_pages = 0;
return 0;
}
@@ -1289,7 +1289,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- struct amdgpu_ttm_gup_task_list *entry;
unsigned long size;
if (gtt == NULL || !gtt->userptr)
@@ -1302,48 +1301,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
if (gtt->userptr > end || gtt->userptr + size <= start)
return false;
- /* Search the lists of tasks that hold this mapping and see
- * if current is one of them. If it is return false.
- */
- spin_lock(>t->guptasklock);
- list_for_each_entry(entry, >t->guptasks, list) {
- if (entry->task == current) {
- spin_unlock(>t->guptasklock);
- return false;
- }
- }
- spin_unlock(>t->guptasklock);
-
- atomic_inc(>t->mmu_invalidations);
-
return true;
}
/**
- * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
- */
-bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
- int *last_invalidated)
-{
- struct amdgpu_ttm_tt *gtt = (void *)ttm;
- int prev_invalidated = *last_invalidated;
-
- *last_invalidated = atomic_read(>t->mmu_invalidations);
- return prev_invalidated != *last_invalidated;
-}
-
-/**
- * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
- * been invalidated since the last time they've been set?
+ * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
*/
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
if (gtt == NULL || !gtt->userptr)
return false;
- return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages;
+ return true;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index b5b2d101f7db..8988c87fff9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
@@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end);
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated);
-bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
+bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6
[not found] ` <20190110170228.10917-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
@ 2019-01-14 15:33 ` Yang, Philip
0 siblings, 0 replies; 11+ messages in thread
From: Yang, Philip @ 2019-01-14 15:33 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Ping Christian, any comments for the GEM and CS part changes?
Thanks. Philip
On 2019-01-10 12:02 p.m., Yang, Philip wrote:
> Use HMM helper function hmm_vma_fault() to get physical pages backing
> userptr and start CPU page table update track of those pages. Then use
> hmm_vma_range_done() to check if those pages are updated before
> amdgpu_cs_submit for gfx or before user queues are resumed for kfd.
>
> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart
> from scratch, for kfd, restore worker is rescheduled to retry.
>
> HMM simplify the CPU page table concurrent update check, so remove
> guptasklock, mmu_invalidations, last_set_pages fields from
> amdgpu_ttm_tt struct.
>
> HMM does not pin the page (increase page ref count), so remove related
> operations like release_pages(), put_page(), mark_page_dirty().
>
> Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 -
> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++-------
> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 158 +++++++---------
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 25 ++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 4 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 173 ++++++++----------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +-
> 9 files changed, 198 insertions(+), 277 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 0b31a1859023..0e1711a75b68 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -61,7 +61,6 @@ struct kgd_mem {
>
> atomic_t invalid;
> struct amdkfd_process_info *process_info;
> - struct page **user_pages;
>
> struct amdgpu_sync sync;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index d7b10d79f1de..ae2d838d31ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
> goto out;
> }
>
> - /* If no restore worker is running concurrently, user_pages
> - * should not be allocated
> - */
> - WARN(mem->user_pages, "Leaking user_pages array");
> -
> - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
> - sizeof(struct page *),
> - GFP_KERNEL | __GFP_ZERO);
> - if (!mem->user_pages) {
> - pr_err("%s: Failed to allocate pages array\n", __func__);
> - ret = -ENOMEM;
> - goto unregister_out;
> - }
> -
> - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
> + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages);
> if (ret) {
> pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
> - goto free_out;
> + goto unregister_out;
> }
>
> - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
> -
> ret = amdgpu_bo_reserve(bo, true);
> if (ret) {
> pr_err("%s: Failed to reserve BO\n", __func__);
> @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
> amdgpu_bo_unreserve(bo);
>
> release_out:
> - if (ret)
> - release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
> -free_out:
> - kvfree(mem->user_pages);
> - mem->user_pages = NULL;
> + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
> unregister_out:
> if (ret)
> amdgpu_mn_unregister(bo);
> @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
> ctx->kfd_bo.priority = 0;
> ctx->kfd_bo.tv.bo = &bo->tbo;
> ctx->kfd_bo.tv.num_shared = 1;
> - ctx->kfd_bo.user_pages = NULL;
> list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>
> amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
> @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
> ctx->kfd_bo.priority = 0;
> ctx->kfd_bo.tv.bo = &bo->tbo;
> ctx->kfd_bo.tv.num_shared = 1;
> - ctx->kfd_bo.user_pages = NULL;
> list_add(&ctx->kfd_bo.tv.head, &ctx->list);
>
> i = 0;
> @@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
> list_del(&bo_list_entry->head);
> mutex_unlock(&process_info->lock);
>
> - /* Free user pages if necessary */
> - if (mem->user_pages) {
> - pr_debug("%s: Freeing user_pages array\n", __func__);
> - if (mem->user_pages[0])
> - release_pages(mem->user_pages,
> - mem->bo->tbo.ttm->num_pages);
> - kvfree(mem->user_pages);
> - }
> -
> ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
> if (unlikely(ret))
> return ret;
> @@ -1855,25 +1824,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
>
> bo = mem->bo;
>
> - if (!mem->user_pages) {
> - mem->user_pages =
> - kvmalloc_array(bo->tbo.ttm->num_pages,
> - sizeof(struct page *),
> - GFP_KERNEL | __GFP_ZERO);
> - if (!mem->user_pages) {
> - pr_err("%s: Failed to allocate pages array\n",
> - __func__);
> - return -ENOMEM;
> - }
> - } else if (mem->user_pages[0]) {
> - release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
> - }
> -
> /* Get updated user pages */
> ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
> - mem->user_pages);
> + bo->tbo.ttm->pages);
> if (ret) {
> - mem->user_pages[0] = NULL;
> + bo->tbo.ttm->pages[0] = NULL;
> pr_info("%s: Failed to get user pages: %d\n",
> __func__, ret);
> /* Pretend it succeeded. It will fail later
> @@ -1882,12 +1837,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
> * stalled user mode queues.
> */
> }
> -
> - /* Mark the BO as valid unless it was invalidated
> - * again concurrently
> - */
> - if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
> - return -EAGAIN;
> }
>
> return 0;
> @@ -1917,7 +1866,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
> GFP_KERNEL);
> if (!pd_bo_list_entries) {
> pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
> - return -ENOMEM;
> + ret = -ENOMEM;
> + goto out_no_mem;
> }
>
> INIT_LIST_HEAD(&resv_list);
> @@ -1941,7 +1891,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
> ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
> WARN(!list_empty(&duplicates), "Duplicates should be empty");
> if (ret)
> - goto out;
> + goto out_free;
>
> amdgpu_sync_create(&sync);
>
> @@ -1967,10 +1917,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
>
> bo = mem->bo;
>
> - /* Copy pages array and validate the BO if we got user pages */
> - if (mem->user_pages[0]) {
> - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
> - mem->user_pages);
> + /* Validate the BO if we got user pages */
> + if (bo->tbo.ttm->pages[0]) {
> amdgpu_bo_placement_from_domain(bo, mem->domain);
> ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> if (ret) {
> @@ -1979,16 +1927,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
> }
> }
>
> - /* Validate succeeded, now the BO owns the pages, free
> - * our copy of the pointer array. Put this BO back on
> - * the userptr_valid_list. If we need to revalidate
> - * it, we need to start from scratch.
> - */
> - kvfree(mem->user_pages);
> - mem->user_pages = NULL;
> list_move_tail(&mem->validate_list.head,
> &process_info->userptr_valid_list);
>
> + /* Stop HMM track the userptr update. We dont check the return
> + * value for concurrent CPU page table update because we will
> + * reschedule the restore worker if process_info->evicted_bos
> + * is updated.
> + */
> + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
> +
> /* Update mapping. If the BO was not validated
> * (because we couldn't get user pages), this will
> * clear the page table entries, which will result in
> @@ -2022,8 +1970,15 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
> ttm_eu_backoff_reservation(&ticket, &resv_list);
> amdgpu_sync_wait(&sync, false);
> amdgpu_sync_free(&sync);
> -out:
> +out_free:
> kfree(pd_bo_list_entries);
> +out_no_mem:
> + list_for_each_entry_safe(mem, tmp_mem,
> + &process_info->userptr_inval_list,
> + validate_list.head) {
> + bo = mem->bo;
> + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
> + }
>
> return ret;
> }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index 7c5f5d1601e6..a130e766cbdb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry {
> struct amdgpu_bo_va *bo_va;
> uint32_t priority;
> struct page **user_pages;
> - int user_invalidated;
> + bool user_invalidated;
> };
>
> struct amdgpu_bo_list {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 1c49b8266d69..451a1fab17d6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
> p->uf_entry.tv.bo = &bo->tbo;
> /* One for TTM and one for the CS job */
> p->uf_entry.tv.num_shared = 2;
> - p->uf_entry.user_pages = NULL;
>
> drm_gem_object_put_unlocked(gobj);
>
> @@ -539,14 +538,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
> if (usermm && usermm != current->mm)
> return -EPERM;
>
> - /* Check if we have user pages and nobody bound the BO already */
> - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
> - lobj->user_pages) {
> + if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
> + lobj->user_invalidated && lobj->user_pages) {
> amdgpu_bo_placement_from_domain(bo,
> AMDGPU_GEM_DOMAIN_CPU);
> r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> if (r)
> return r;
> +
> amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
> lobj->user_pages);
> binding_userptr = true;
> @@ -577,7 +576,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> struct amdgpu_bo *gds;
> struct amdgpu_bo *gws;
> struct amdgpu_bo *oa;
> - unsigned tries = 10;
> int r;
>
> INIT_LIST_HEAD(&p->validated);
> @@ -613,79 +611,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
> list_add(&p->uf_entry.tv.head, &p->validated);
>
> - while (1) {
> - struct list_head need_pages;
> -
> - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
> - &duplicates);
> - if (unlikely(r != 0)) {
> - if (r != -ERESTARTSYS)
> - DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
> - goto error_free_pages;
> - }
> -
> - INIT_LIST_HEAD(&need_pages);
> - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
> - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> -
> - if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
> - &e->user_invalidated) && e->user_pages) {
> -
> - /* We acquired a page array, but somebody
> - * invalidated it. Free it and try again
> - */
> - release_pages(e->user_pages,
> - bo->tbo.ttm->num_pages);
> - kvfree(e->user_pages);
> - e->user_pages = NULL;
> - }
> -
> - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
> - !e->user_pages) {
> - list_del(&e->tv.head);
> - list_add(&e->tv.head, &need_pages);
> -
> - amdgpu_bo_unreserve(bo);
> - }
> + /* Get userptr backing pages. If pages are updated after registered
> + * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
> + * amdgpu_ttm_backend_bind() to flush and invalidate new pages
> + */
> + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
> + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> + bool userpage_invalidated = false;
> + int i;
> +
> + e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
> + sizeof(struct page *),
> + GFP_KERNEL | __GFP_ZERO);
> + if (!e->user_pages) {
> + DRM_ERROR("calloc failure\n");
> + return -ENOMEM;
> }
>
> - if (list_empty(&need_pages))
> - break;
> -
> - /* Unreserve everything again. */
> - ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> -
> - /* We tried too many times, just abort */
> - if (!--tries) {
> - r = -EDEADLK;
> - DRM_ERROR("deadlock in %s\n", __func__);
> - goto error_free_pages;
> + r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages);
> + if (r) {
> + kvfree(e->user_pages);
> + e->user_pages = NULL;
> + return r;
> }
>
> - /* Fill the page arrays for all userptrs. */
> - list_for_each_entry(e, &need_pages, tv.head) {
> - struct ttm_tt *ttm = e->tv.bo->ttm;
> -
> - e->user_pages = kvmalloc_array(ttm->num_pages,
> - sizeof(struct page*),
> - GFP_KERNEL | __GFP_ZERO);
> - if (!e->user_pages) {
> - r = -ENOMEM;
> - DRM_ERROR("calloc failure in %s\n", __func__);
> - goto error_free_pages;
> - }
> -
> - r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
> - if (r) {
> - DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
> - kvfree(e->user_pages);
> - e->user_pages = NULL;
> - goto error_free_pages;
> + for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
> + if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
> + userpage_invalidated = true;
> + break;
> }
> }
> + e->user_invalidated = userpage_invalidated;
> + }
>
> - /* And try again. */
> - list_splice(&need_pages, &p->validated);
> + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
> + &duplicates);
> + if (unlikely(r != 0)) {
> + if (r != -ERESTARTSYS)
> + DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
> + goto out;
> }
>
> amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
> @@ -754,17 +718,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
> error_validate:
> if (r)
> ttm_eu_backoff_reservation(&p->ticket, &p->validated);
> -
> -error_free_pages:
> -
> - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
> - if (!e->user_pages)
> - continue;
> -
> - release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
> - kvfree(e->user_pages);
> - }
> -
> +out:
> return r;
> }
>
> @@ -1213,8 +1167,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> struct amdgpu_bo_list_entry *e;
> struct amdgpu_job *job;
> uint64_t seq;
> -
> - int r;
> + int r = 0;
>
> job = p->job;
> p->job = NULL;
> @@ -1223,15 +1176,21 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> if (r)
> goto error_unlock;
>
> - /* No memory allocation is allowed while holding the mn lock */
> + /* No memory allocation is allowed while holding the mn lock.
> + * p->mn is hold until amdgpu_cs_submit is finished and fence is added
> + * to BOs.
> + */
> amdgpu_mn_lock(p->mn);
> +
> + /* If userptr are updated after amdgpu_cs_parser_bos(), restart cs */
> amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
> struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>
> - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
> - r = -ERESTARTSYS;
> - goto error_abort;
> - }
> + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
> + }
> + if (r) {
> + r = -ERESTARTSYS;
> + goto error_abort;
> }
>
> job->owner = p->filp;
> @@ -1278,14 +1237,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> {
> struct amdgpu_device *adev = dev->dev_private;
> - union drm_amdgpu_cs *cs = data;
> - struct amdgpu_cs_parser parser = {};
> - bool reserved_buffers = false;
> + union drm_amdgpu_cs *cs;
> + struct amdgpu_cs_parser parser;
> + bool reserved_buffers;
> + int tries = 10;
> int i, r;
>
> if (!adev->accel_working)
> return -EBUSY;
>
> +restart:
> + memset(&parser, 0, sizeof(parser));
> + cs = data;
> + reserved_buffers = false;
> +
> parser.adev = adev;
> parser.filp = filp;
>
> @@ -1327,6 +1292,15 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>
> out:
> amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
> +
> + if (r == -ERESTARTSYS) {
> + if (!--tries) {
> + DRM_ERROR("Possible deadlock? Retry too many times\n");
> + return -EDEADLK;
> + }
> + goto restart;
> + }
> +
> return r;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index f4f00217546e..92025993bfb1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -336,26 +336,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
>
> r = amdgpu_bo_reserve(bo, true);
> if (r)
> - goto free_pages;
> + goto user_pages_done;
>
> amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
> r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> amdgpu_bo_unreserve(bo);
> if (r)
> - goto free_pages;
> + goto user_pages_done;
> }
>
> r = drm_gem_handle_create(filp, gobj, &handle);
> - /* drop reference from allocate - handle holds it now */
> - drm_gem_object_put_unlocked(gobj);
> if (r)
> - return r;
> + goto user_pages_done;
>
> args->handle = handle;
> - return 0;
>
> -free_pages:
> - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
> +user_pages_done:
> + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
> + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
>
> release_object:
> drm_gem_object_put_unlocked(gobj);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index 5d518d2bb9be..9d40f3001f3c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
> true, false, MAX_SCHEDULE_TIMEOUT);
> if (r <= 0)
> DRM_ERROR("(%ld) failed to wait for user bo\n", r);
> -
> - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm);
> }
> }
>
> @@ -513,3 +511,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
> mutex_unlock(&adev->mn_lock);
> }
>
> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */
> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
> + (1 << 0), /* HMM_PFN_VALID */
> + (1 << 1), /* HMM_PFN_WRITE */
> + 0 /* HMM_PFN_DEVICE_PRIVATE */
> +};
> +
> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
> + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
> + 0, /* HMM_PFN_NONE */
> + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
> +};
> +
> +void amdgpu_hmm_init_range(struct hmm_range *range)
> +{
> + if (range) {
> + range->flags = hmm_range_flags;
> + range->values = hmm_range_values;
> + range->pfn_shift = PAGE_SHIFT;
> + range->pfns = NULL;
> + INIT_LIST_HEAD(&range->list);
> + }
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> index 0a51fd00021c..4803e216e174 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> @@ -25,9 +25,10 @@
> #define __AMDGPU_MN_H__
>
> /*
> - * MMU Notifier
> + * HMM mirror
> */
> struct amdgpu_mn;
> +struct hmm_range;
>
> enum amdgpu_mn_type {
> AMDGPU_MN_TYPE_GFX,
> @@ -41,6 +42,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> enum amdgpu_mn_type type);
> int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
> void amdgpu_mn_unregister(struct amdgpu_bo *bo);
> +void amdgpu_hmm_init_range(struct hmm_range *range);
> #else
> static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
> static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c91ec3101d00..3d074f4b02f4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -43,6 +43,7 @@
> #include <linux/pagemap.h>
> #include <linux/debugfs.h>
> #include <linux/iommu.h>
> +#include <linux/hmm.h>
> #include "amdgpu.h"
> #include "amdgpu_object.h"
> #include "amdgpu_trace.h"
> @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
> /*
> * TTM backend functions.
> */
> -struct amdgpu_ttm_gup_task_list {
> - struct list_head list;
> - struct task_struct *task;
> -};
> -
> struct amdgpu_ttm_tt {
> struct ttm_dma_tt ttm;
> u64 offset;
> @@ -718,85 +714,96 @@ struct amdgpu_ttm_tt {
> uint32_t userflags;
> spinlock_t guptasklock;
> struct list_head guptasks;
> - atomic_t mmu_invalidations;
> - uint32_t last_set_pages;
> + struct hmm_range range;
> };
>
> /**
> - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
> - * pointer to memory
> + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
> + * memory and start HMM tracking CPU page table update
> *
> - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
> - * This provides a wrapper around the get_user_pages() call to provide
> - * device accessible pages that back user memory.
> + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
> + * once afterwards to stop HMM tracking
> */
> int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
> {
> struct amdgpu_ttm_tt *gtt = (void *)ttm;
> struct mm_struct *mm = gtt->usertask->mm;
> - unsigned int flags = 0;
> - unsigned pinned = 0;
> - int r;
> + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> + struct hmm_range *range = >t->range;
> + int r = 0, i;
>
> if (!mm) /* Happens during process shutdown */
> return -ESRCH;
>
> - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
> - flags |= FOLL_WRITE;
> + amdgpu_hmm_init_range(range);
>
> down_read(&mm->mmap_sem);
>
> - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
> - /*
> - * check that we only use anonymous memory to prevent problems
> - * with writeback
> - */
> - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
> - struct vm_area_struct *vma;
> + range->vma = find_vma(mm, gtt->userptr);
> + if (!range_in_vma(range->vma, gtt->userptr, end))
> + r = -EFAULT;
> + else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
> + range->vma->vm_file)
> + r = -EPERM;
> + if (r)
> + goto out;
>
> - vma = find_vma(mm, gtt->userptr);
> - if (!vma || vma->vm_file || vma->vm_end < end) {
> - up_read(&mm->mmap_sem);
> - return -EPERM;
> - }
> + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t),
> + GFP_KERNEL);
> + if (range->pfns == NULL) {
> + r = -ENOMEM;
> + goto out;
> }
> + range->start = gtt->userptr;
> + range->end = end;
>
> - /* loop enough times using contiguous pages of memory */
> - do {
> - unsigned num_pages = ttm->num_pages - pinned;
> - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
> - struct page **p = pages + pinned;
> - struct amdgpu_ttm_gup_task_list guptask;
> + range->pfns[0] = range->flags[HMM_PFN_VALID];
> + range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ?
> + 0 : range->flags[HMM_PFN_WRITE];
> + for (i = 1; i < ttm->num_pages; i++)
> + range->pfns[i] = range->pfns[0];
>
> - guptask.task = current;
> - spin_lock(>t->guptasklock);
> - list_add(&guptask.list, >t->guptasks);
> - spin_unlock(>t->guptasklock);
> + /* This may trigger page table update */
> + r = hmm_vma_fault(range, true);
> + if (r)
> + goto out_free_pfns;
>
> - if (mm == current->mm)
> - r = get_user_pages(userptr, num_pages, flags, p, NULL);
> - else
> - r = get_user_pages_remote(gtt->usertask,
> - mm, userptr, num_pages,
> - flags, p, NULL, NULL);
> + up_read(&mm->mmap_sem);
>
> - spin_lock(>t->guptasklock);
> - list_del(&guptask.list);
> - spin_unlock(>t->guptasklock);
> + for (i = 0; i < ttm->num_pages; i++)
> + pages[i] = hmm_pfn_to_page(range, range->pfns[i]);
>
> - if (r < 0)
> - goto release_pages;
> + return 0;
>
> - pinned += r;
> +out_free_pfns:
> + kvfree(range->pfns);
> + range->pfns = NULL;
> +out:
> + up_read(&mm->mmap_sem);
> + return r;
> +}
>
> - } while (pinned < ttm->num_pages);
> +/**
> + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
> + * Check if the pages backing this ttm range have been invalidated
> + *
> + * Returns: true if pages are still valid
> + */
> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
> +{
> + struct amdgpu_ttm_tt *gtt = (void *)ttm;
> + bool r = false;
>
> - up_read(&mm->mmap_sem);
> - return 0;
> + if (!gtt || !gtt->userptr)
> + return false;
> +
> + WARN_ONCE(!gtt->range.pfns, "No user pages to check\n");
> + if (gtt->range.pfns) {
> + r = hmm_vma_range_done(>t->range);
> + kvfree(gtt->range.pfns);
> + gtt->range.pfns = NULL;
> + }
>
> -release_pages:
> - release_pages(pages, pinned);
> - up_read(&mm->mmap_sem);
> return r;
> }
>
> @@ -809,16 +816,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
> */
> void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
> {
> - struct amdgpu_ttm_tt *gtt = (void *)ttm;
> unsigned i;
>
> - gtt->last_set_pages = atomic_read(>t->mmu_invalidations);
> - for (i = 0; i < ttm->num_pages; ++i) {
> - if (ttm->pages[i])
> - put_page(ttm->pages[i]);
> -
> + for (i = 0; i < ttm->num_pages; ++i)
> ttm->pages[i] = pages ? pages[i] : NULL;
> - }
> }
>
> /**
> @@ -903,10 +904,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
> /* unmap the pages mapped to the device */
> dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
>
> - /* mark the pages as dirty */
> - amdgpu_ttm_tt_mark_user_pages(ttm);
> -
> sg_free_table(ttm->sg);
> +
> + if (gtt->range.pfns &&
> + ttm->pages[0] == hmm_pfn_to_page(>t->range, gtt->range.pfns[0]))
> + WARN_ONCE(1, "Missing get_user_page_done\n");
> }
>
> int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
> @@ -1258,8 +1260,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>
> spin_lock_init(>t->guptasklock);
> INIT_LIST_HEAD(>t->guptasks);
> - atomic_set(>t->mmu_invalidations, 0);
> - gtt->last_set_pages = 0;
>
> return 0;
> }
> @@ -1289,7 +1289,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
> unsigned long end)
> {
> struct amdgpu_ttm_tt *gtt = (void *)ttm;
> - struct amdgpu_ttm_gup_task_list *entry;
> unsigned long size;
>
> if (gtt == NULL || !gtt->userptr)
> @@ -1302,48 +1301,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
> if (gtt->userptr > end || gtt->userptr + size <= start)
> return false;
>
> - /* Search the lists of tasks that hold this mapping and see
> - * if current is one of them. If it is return false.
> - */
> - spin_lock(>t->guptasklock);
> - list_for_each_entry(entry, >t->guptasks, list) {
> - if (entry->task == current) {
> - spin_unlock(>t->guptasklock);
> - return false;
> - }
> - }
> - spin_unlock(>t->guptasklock);
> -
> - atomic_inc(>t->mmu_invalidations);
> -
> return true;
> }
>
> /**
> - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
> - */
> -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
> - int *last_invalidated)
> -{
> - struct amdgpu_ttm_tt *gtt = (void *)ttm;
> - int prev_invalidated = *last_invalidated;
> -
> - *last_invalidated = atomic_read(>t->mmu_invalidations);
> - return prev_invalidated != *last_invalidated;
> -}
> -
> -/**
> - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
> - * been invalidated since the last time they've been set?
> + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
> */
> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
> {
> struct amdgpu_ttm_tt *gtt = (void *)ttm;
>
> if (gtt == NULL || !gtt->userptr)
> return false;
>
> - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages;
> + return true;
> }
>
> /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> index b5b2d101f7db..8988c87fff9d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
> @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo);
> int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
>
> int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
> void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
> void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm);
> int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
> @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
> unsigned long end);
> bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
> int *last_invalidated);
> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm);
> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm);
> bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
> uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem);
> uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
[not found] ` <ee46d8e7-cf64-eb8c-cfd6-793db28c6c69-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-04 18:09 ` Christian König
0 siblings, 0 replies; 11+ messages in thread
From: Christian König @ 2019-02-04 18:09 UTC (permalink / raw)
To: Yang, Philip, Koenig, Christian,
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Am 04.02.19 um 18:17 schrieb Yang, Philip:
>
> On 2019-02-04 10:18 a.m., Christian König wrote:
>> Am 04.02.19 um 16:06 schrieb Yang, Philip:
>>> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
>>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
>>> DRM_AMDGPU_USERPTR Kconfig.
>>>
>>> It supports both KFD userptr and gfx userptr paths.
>>>
>>> The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
>>> kernel now.
>>>
>>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/Kconfig | 6 +-
>>> drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 139 +++++++++++--------------
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 2 +-
>>> 4 files changed, 67 insertions(+), 82 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> index 9221e5489069..960a63355705 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>>> config DRM_AMDGPU_USERPTR
>>> bool "Always enable userptr write support"
>>> depends on DRM_AMDGPU
>>> - select MMU_NOTIFIER
>>> + select HMM_MIRROR
>>> help
>>> - This option selects CONFIG_MMU_NOTIFIER if it isn't already
>>> - selected to enabled full userptr support.
>>> + This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>>> + isn't already selected to enabled full userptr support.
>>> config DRM_AMDGPU_GART_DEBUGFS
>>> bool "Allow GART access through debugfs"
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> index 466da5954a68..851001ced5e8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>>> @@ -172,7 +172,7 @@ endif
>>> amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>>> amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>>> amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>>> +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
>>> include $(FULL_AMD_PATH)/powerplay/Makefile
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> index 3e6823fdd939..5d518d2bb9be 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>>> @@ -45,7 +45,7 @@
>>> #include <linux/firmware.h>
>>> #include <linux/module.h>
>>> -#include <linux/mmu_notifier.h>
>>> +#include <linux/hmm.h>
>>> #include <linux/interval_tree.h>
>>> #include <drm/drmP.h>
>>> #include <drm/drm.h>
>>> @@ -58,7 +58,6 @@
>>> *
>>> * @adev: amdgpu device pointer
>>> * @mm: process address space
>>> - * @mn: MMU notifier structure
>>> * @type: type of MMU notifier
>>> * @work: destruction work item
>>> * @node: hash table node to find structure by adev and mn
>>> @@ -66,6 +65,7 @@
>>> * @objects: interval tree containing amdgpu_mn_nodes
>>> * @read_lock: mutex for recursive locking of @lock
>>> * @recursion: depth of recursion
>>> + * @mirror: HMM mirror function support
>>> *
>>> * Data for each amdgpu device and process address space.
>>> */
>>> @@ -73,7 +73,6 @@ struct amdgpu_mn {
>>> /* constant after initialisation */
>>> struct amdgpu_device *adev;
>>> struct mm_struct *mm;
>>> - struct mmu_notifier mn;
>>> enum amdgpu_mn_type type;
>>> /* only used on destruction */
>>> @@ -87,6 +86,9 @@ struct amdgpu_mn {
>>> struct rb_root_cached objects;
>>> struct mutex read_lock;
>>> atomic_t recursion;
>> With HMM we don't need this any more. Please remove it and simplify
>> amdgpu_mn_read_lock() and amdgpu_mn_read_unlock().
>>
> Thanks, this makes sense because HMM uses hmm->mirror_sem to serialize
> invalidate range.
>
> amn->read_lock is also not needed anymore, this was used to protect
> atomic operations for atomic_inc_return(amn->recursion) and
> down_read(amn->lock).
>
> Just one amn->lock is needed to sync amdgpu_cs_submit and userptr
> update. I will submit another patch.
Sounds good. I will take a closer look at the other patches tomorrow.
Thanks,
Christian.
>
> Philip
>
>> Apart from that looks good to me,
>> Christian.
>>
>>> +
>>> + /* HMM mirror */
>>> + struct hmm_mirror mirror;
>>> };
>>> /**
>>> @@ -103,7 +105,7 @@ struct amdgpu_mn_node {
>>> };
>>> /**
>>> - * amdgpu_mn_destroy - destroy the MMU notifier
>>> + * amdgpu_mn_destroy - destroy the HMM mirror
>>> *
>>> * @work: previously sheduled work item
>>> *
>>> @@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct
>>> *work)
>>> }
>>> up_write(&amn->lock);
>>> mutex_unlock(&adev->mn_lock);
>>> - mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>>> +
>>> + hmm_mirror_unregister(&amn->mirror);
>>> kfree(amn);
>>> }
>>> /**
>>> - * amdgpu_mn_release - callback to notify about mm destruction
>>> + * amdgpu_hmm_mirror_release - callback to notify about mm destruction
>>> *
>>> - * @mn: our notifier
>>> - * @mm: the mm this callback is about
>>> + * @mirror: the HMM mirror (mm) this callback is about
>>> *
>>> - * Shedule a work item to lazy destroy our notifier.
>>> + * Shedule a work item to lazy destroy HMM mirror.
>>> */
>>> -static void amdgpu_mn_release(struct mmu_notifier *mn,
>>> - struct mm_struct *mm)
>>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>>> {
>>> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
>>> mirror);
>>> INIT_WORK(&amn->work, amdgpu_mn_destroy);
>>> schedule_work(&amn->work);
>>> }
>>> -
>>> /**
>>> * amdgpu_mn_lock - take the write side lock for this notifier
>>> *
>>> @@ -237,141 +237,126 @@ static void amdgpu_mn_invalidate_node(struct
>>> amdgpu_mn_node *node,
>>> /**
>>> * amdgpu_mn_invalidate_range_start_gfx - callback to notify about
>>> mm change
>>> *
>>> - * @mn: our notifier
>>> - * @range: mmu notifier context
>>> + * @mirror: the hmm_mirror (mm) is about to update
>>> + * @update: the update start, end address
>>> *
>>> * Block for operations on BOs to finish and mark pages as accessed and
>>> * potentially dirty.
>>> */
>>> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>>> - const struct mmu_notifier_range *range)
>>> +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
>>> + const struct hmm_update *update)
>>> {
>>> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
>>> mirror);
>>> + unsigned long start = update->start;
>>> + unsigned long end = update->end;
>>> + bool blockable = update->blockable;
>>> struct interval_tree_node *it;
>>> - unsigned long end;
>>> /* notification is exclusive, but interval is inclusive */
>>> - end = range->end - 1;
>>> + end -= 1;
>>> /* TODO we should be able to split locking for interval tree and
>>> * amdgpu_mn_invalidate_node
>>> */
>>> - if (amdgpu_mn_read_lock(amn, range->blockable))
>>> + if (amdgpu_mn_read_lock(amn, blockable))
>>> return -EAGAIN;
>>> - it = interval_tree_iter_first(&amn->objects, range->start, end);
>>> + it = interval_tree_iter_first(&amn->objects, start, end);
>>> while (it) {
>>> struct amdgpu_mn_node *node;
>>> - if (!range->blockable) {
>>> + if (!blockable) {
>>> amdgpu_mn_read_unlock(amn);
>>> return -EAGAIN;
>>> }
>>> node = container_of(it, struct amdgpu_mn_node, it);
>>> - it = interval_tree_iter_next(it, range->start, end);
>>> + it = interval_tree_iter_next(it, start, end);
>>> - amdgpu_mn_invalidate_node(node, range->start, end);
>>> + amdgpu_mn_invalidate_node(node, start, end);
>>> }
>>> + amdgpu_mn_read_unlock(amn);
>>> +
>>> return 0;
>>> }
>>> /**
>>> * amdgpu_mn_invalidate_range_start_hsa - callback to notify about
>>> mm change
>>> *
>>> - * @mn: our notifier
>>> - * @mm: the mm this callback is about
>>> - * @start: start of updated range
>>> - * @end: end of updated range
>>> + * @mirror: the hmm_mirror (mm) is about to update
>>> + * @update: the update start, end address
>>> *
>>> * We temporarily evict all BOs between start and end. This
>>> * necessitates evicting all user-mode queues of the process. The BOs
>>> * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>>> */
>>> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>>> - const struct mmu_notifier_range *range)
>>> +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
>>> + const struct hmm_update *update)
>>> {
>>> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
>>> mirror);
>>> + unsigned long start = update->start;
>>> + unsigned long end = update->end;
>>> + bool blockable = update->blockable;
>>> struct interval_tree_node *it;
>>> - unsigned long end;
>>> /* notification is exclusive, but interval is inclusive */
>>> - end = range->end - 1;
>>> + end -= 1;
>>> - if (amdgpu_mn_read_lock(amn, range->blockable))
>>> + if (amdgpu_mn_read_lock(amn, blockable))
>>> return -EAGAIN;
>>> - it = interval_tree_iter_first(&amn->objects, range->start, end);
>>> + it = interval_tree_iter_first(&amn->objects, start, end);
>>> while (it) {
>>> struct amdgpu_mn_node *node;
>>> struct amdgpu_bo *bo;
>>> - if (!range->blockable) {
>>> + if (!blockable) {
>>> amdgpu_mn_read_unlock(amn);
>>> return -EAGAIN;
>>> }
>>> node = container_of(it, struct amdgpu_mn_node, it);
>>> - it = interval_tree_iter_next(it, range->start, end);
>>> + it = interval_tree_iter_next(it, start, end);
>>> list_for_each_entry(bo, &node->bos, mn_list) {
>>> struct kgd_mem *mem = bo->kfd_bo;
>>> if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>>> - range->start,
>>> - end))
>>> - amdgpu_amdkfd_evict_userptr(mem, range->mm);
>>> + start, end))
>>> + amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>>> }
>>> }
>>> + amdgpu_mn_read_unlock(amn);
>>> +
>>> return 0;
>>> }
>>> -/**
>>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
>>> - *
>>> - * @mn: our notifier
>>> - * @mm: the mm this callback is about
>>> - * @start: start of updated range
>>> - * @end: end of updated range
>>> - *
>>> - * Release the lock again to allow new command submissions.
>>> +/* Low bits of any reasonable mm pointer will be unused due to struct
>>> + * alignment. Use these bits to make a unique key from the mm pointer
>>> + * and notifier type.
>>> */
>>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>>> - const struct mmu_notifier_range *range)
>>> -{
>>> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>>> -
>>> - amdgpu_mn_read_unlock(amn);
>>> -}
>>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>> -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
>>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>>> [AMDGPU_MN_TYPE_GFX] = {
>>> - .release = amdgpu_mn_release,
>>> - .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
>>> - .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>> + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
>>> + .release = amdgpu_hmm_mirror_release
>>> },
>>> [AMDGPU_MN_TYPE_HSA] = {
>>> - .release = amdgpu_mn_release,
>>> - .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
>>> - .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>>> + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
>>> + .release = amdgpu_hmm_mirror_release
>>> },
>>> };
>>> -/* Low bits of any reasonable mm pointer will be unused due to struct
>>> - * alignment. Use these bits to make a unique key from the mm pointer
>>> - * and notifier type.
>>> - */
>>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>> -
>>> /**
>>> - * amdgpu_mn_get - create notifier context
>>> + * amdgpu_mn_get - create HMM mirror context
>>> *
>>> * @adev: amdgpu device pointer
>>> * @type: type of MMU notifier context
>>> *
>>> - * Creates a notifier context for current->mm.
>>> + * Creates a HMM mirror context for current->mm.
>>> */
>>> struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>>> enum amdgpu_mn_type type)
>>> @@ -401,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct
>>> amdgpu_device *adev,
>>> amn->mm = mm;
>>> init_rwsem(&amn->lock);
>>> amn->type = type;
>>> - amn->mn.ops = &amdgpu_mn_ops[type];
>>> amn->objects = RB_ROOT_CACHED;
>>> mutex_init(&amn->read_lock);
>>> atomic_set(&amn->recursion, 0);
>>> - r = __mmu_notifier_register(&amn->mn, mm);
>>> + amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
>>> + r = hmm_mirror_register(&amn->mirror, mm);
>>> if (r)
>>> goto free_amn;
>>> @@ -432,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct
>>> amdgpu_device *adev,
>>> * @bo: amdgpu buffer object
>>> * @addr: userptr addr we should monitor
>>> *
>>> - * Registers an MMU notifier for the given BO at the specified address.
>>> + * Registers an HMM mirror for the given BO at the specified address.
>>> * Returns 0 on success, -ERRNO if anything goes wrong.
>>> */
>>> int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>>> @@ -488,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo,
>>> unsigned long addr)
>>> }
>>> /**
>>> - * amdgpu_mn_unregister - unregister a BO for notifier updates
>>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>>> *
>>> * @bo: amdgpu buffer object
>>> *
>>> - * Remove any registration of MMU notifier updates from the buffer
>>> object.
>>> + * Remove any registration of HMM mirror updates from the buffer object.
>>> */
>>> void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>>> {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>> index eb0f432f78fe..0a51fd00021c 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>>> AMDGPU_MN_TYPE_HSA,
>>> };
>>> -#if defined(CONFIG_MMU_NOTIFIER)
>>> +#if defined(CONFIG_HMM_MIRROR)
>>> void amdgpu_mn_lock(struct amdgpu_mn *mn);
>>> void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>>> struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
[not found] ` <71e6094c-a257-7aa3-854c-9a9dff163b01-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-02-04 17:17 ` Yang, Philip
[not found] ` <ee46d8e7-cf64-eb8c-cfd6-793db28c6c69-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 11+ messages in thread
From: Yang, Philip @ 2019-02-04 17:17 UTC (permalink / raw)
To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
On 2019-02-04 10:18 a.m., Christian König wrote:
> Am 04.02.19 um 16:06 schrieb Yang, Philip:
>> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
>> DRM_AMDGPU_USERPTR Kconfig.
>>
>> It supports both KFD userptr and gfx userptr paths.
>>
>> The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
>> kernel now.
>>
>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/Kconfig | 6 +-
>> drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 139 +++++++++++--------------
>> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 2 +-
>> 4 files changed, 67 insertions(+), 82 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig
>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> index 9221e5489069..960a63355705 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
>> config DRM_AMDGPU_USERPTR
>> bool "Always enable userptr write support"
>> depends on DRM_AMDGPU
>> - select MMU_NOTIFIER
>> + select HMM_MIRROR
>> help
>> - This option selects CONFIG_MMU_NOTIFIER if it isn't already
>> - selected to enabled full userptr support.
>> + This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>> + isn't already selected to enabled full userptr support.
>> config DRM_AMDGPU_GART_DEBUGFS
>> bool "Allow GART access through debugfs"
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index 466da5954a68..851001ced5e8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -172,7 +172,7 @@ endif
>> amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>> amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>> amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>> +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
>> include $(FULL_AMD_PATH)/powerplay/Makefile
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> index 3e6823fdd939..5d518d2bb9be 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> @@ -45,7 +45,7 @@
>> #include <linux/firmware.h>
>> #include <linux/module.h>
>> -#include <linux/mmu_notifier.h>
>> +#include <linux/hmm.h>
>> #include <linux/interval_tree.h>
>> #include <drm/drmP.h>
>> #include <drm/drm.h>
>> @@ -58,7 +58,6 @@
>> *
>> * @adev: amdgpu device pointer
>> * @mm: process address space
>> - * @mn: MMU notifier structure
>> * @type: type of MMU notifier
>> * @work: destruction work item
>> * @node: hash table node to find structure by adev and mn
>> @@ -66,6 +65,7 @@
>> * @objects: interval tree containing amdgpu_mn_nodes
>> * @read_lock: mutex for recursive locking of @lock
>> * @recursion: depth of recursion
>> + * @mirror: HMM mirror function support
>> *
>> * Data for each amdgpu device and process address space.
>> */
>> @@ -73,7 +73,6 @@ struct amdgpu_mn {
>> /* constant after initialisation */
>> struct amdgpu_device *adev;
>> struct mm_struct *mm;
>> - struct mmu_notifier mn;
>> enum amdgpu_mn_type type;
>> /* only used on destruction */
>> @@ -87,6 +86,9 @@ struct amdgpu_mn {
>> struct rb_root_cached objects;
>> struct mutex read_lock;
>
>> atomic_t recursion;
>
> With HMM we don't need this any more. Please remove it and simplify
> amdgpu_mn_read_lock() and amdgpu_mn_read_unlock().
>
Thanks, this makes sense because HMM uses hmm->mirror_sem to serialize
invalidate range.
amn->read_lock is also not needed anymore, this was used to protect
atomic operations for atomic_inc_return(amn->recursion) and
down_read(amn->lock).
Just one amn->lock is needed to sync amdgpu_cs_submit and userptr
update. I will submit another patch.
Philip
> Apart from that looks good to me,
> Christian.
>
>> +
>> + /* HMM mirror */
>> + struct hmm_mirror mirror;
>> };
>> /**
>> @@ -103,7 +105,7 @@ struct amdgpu_mn_node {
>> };
>> /**
>> - * amdgpu_mn_destroy - destroy the MMU notifier
>> + * amdgpu_mn_destroy - destroy the HMM mirror
>> *
>> * @work: previously sheduled work item
>> *
>> @@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct
>> *work)
>> }
>> up_write(&amn->lock);
>> mutex_unlock(&adev->mn_lock);
>> - mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>> +
>> + hmm_mirror_unregister(&amn->mirror);
>> kfree(amn);
>> }
>> /**
>> - * amdgpu_mn_release - callback to notify about mm destruction
>> + * amdgpu_hmm_mirror_release - callback to notify about mm destruction
>> *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> + * @mirror: the HMM mirror (mm) this callback is about
>> *
>> - * Shedule a work item to lazy destroy our notifier.
>> + * Shedule a work item to lazy destroy HMM mirror.
>> */
>> -static void amdgpu_mn_release(struct mmu_notifier *mn,
>> - struct mm_struct *mm)
>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>> {
>> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
>> mirror);
>> INIT_WORK(&amn->work, amdgpu_mn_destroy);
>> schedule_work(&amn->work);
>> }
>> -
>> /**
>> * amdgpu_mn_lock - take the write side lock for this notifier
>> *
>> @@ -237,141 +237,126 @@ static void amdgpu_mn_invalidate_node(struct
>> amdgpu_mn_node *node,
>> /**
>> * amdgpu_mn_invalidate_range_start_gfx - callback to notify about
>> mm change
>> *
>> - * @mn: our notifier
>> - * @range: mmu notifier context
>> + * @mirror: the hmm_mirror (mm) is about to update
>> + * @update: the update start, end address
>> *
>> * Block for operations on BOs to finish and mark pages as accessed and
>> * potentially dirty.
>> */
>> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>> - const struct mmu_notifier_range *range)
>> +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
>> + const struct hmm_update *update)
>> {
>> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
>> mirror);
>> + unsigned long start = update->start;
>> + unsigned long end = update->end;
>> + bool blockable = update->blockable;
>> struct interval_tree_node *it;
>> - unsigned long end;
>> /* notification is exclusive, but interval is inclusive */
>> - end = range->end - 1;
>> + end -= 1;
>> /* TODO we should be able to split locking for interval tree and
>> * amdgpu_mn_invalidate_node
>> */
>> - if (amdgpu_mn_read_lock(amn, range->blockable))
>> + if (amdgpu_mn_read_lock(amn, blockable))
>> return -EAGAIN;
>> - it = interval_tree_iter_first(&amn->objects, range->start, end);
>> + it = interval_tree_iter_first(&amn->objects, start, end);
>> while (it) {
>> struct amdgpu_mn_node *node;
>> - if (!range->blockable) {
>> + if (!blockable) {
>> amdgpu_mn_read_unlock(amn);
>> return -EAGAIN;
>> }
>> node = container_of(it, struct amdgpu_mn_node, it);
>> - it = interval_tree_iter_next(it, range->start, end);
>> + it = interval_tree_iter_next(it, start, end);
>> - amdgpu_mn_invalidate_node(node, range->start, end);
>> + amdgpu_mn_invalidate_node(node, start, end);
>> }
>> + amdgpu_mn_read_unlock(amn);
>> +
>> return 0;
>> }
>> /**
>> * amdgpu_mn_invalidate_range_start_hsa - callback to notify about
>> mm change
>> *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> + * @mirror: the hmm_mirror (mm) is about to update
>> + * @update: the update start, end address
>> *
>> * We temporarily evict all BOs between start and end. This
>> * necessitates evicting all user-mode queues of the process. The BOs
>> * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>> */
>> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
>> - const struct mmu_notifier_range *range)
>> +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
>> + const struct hmm_update *update)
>> {
>> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
>> mirror);
>> + unsigned long start = update->start;
>> + unsigned long end = update->end;
>> + bool blockable = update->blockable;
>> struct interval_tree_node *it;
>> - unsigned long end;
>> /* notification is exclusive, but interval is inclusive */
>> - end = range->end - 1;
>> + end -= 1;
>> - if (amdgpu_mn_read_lock(amn, range->blockable))
>> + if (amdgpu_mn_read_lock(amn, blockable))
>> return -EAGAIN;
>> - it = interval_tree_iter_first(&amn->objects, range->start, end);
>> + it = interval_tree_iter_first(&amn->objects, start, end);
>> while (it) {
>> struct amdgpu_mn_node *node;
>> struct amdgpu_bo *bo;
>> - if (!range->blockable) {
>> + if (!blockable) {
>> amdgpu_mn_read_unlock(amn);
>> return -EAGAIN;
>> }
>> node = container_of(it, struct amdgpu_mn_node, it);
>> - it = interval_tree_iter_next(it, range->start, end);
>> + it = interval_tree_iter_next(it, start, end);
>> list_for_each_entry(bo, &node->bos, mn_list) {
>> struct kgd_mem *mem = bo->kfd_bo;
>> if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>> - range->start,
>> - end))
>> - amdgpu_amdkfd_evict_userptr(mem, range->mm);
>> + start, end))
>> + amdgpu_amdkfd_evict_userptr(mem, amn->mm);
>> }
>> }
>> + amdgpu_mn_read_unlock(amn);
>> +
>> return 0;
>> }
>> -/**
>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
>> - *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> - *
>> - * Release the lock again to allow new command submissions.
>> +/* Low bits of any reasonable mm pointer will be unused due to struct
>> + * alignment. Use these bits to make a unique key from the mm pointer
>> + * and notifier type.
>> */
>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>> - const struct mmu_notifier_range *range)
>> -{
>> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
>> -
>> - amdgpu_mn_read_unlock(amn);
>> -}
>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>> -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>> [AMDGPU_MN_TYPE_GFX] = {
>> - .release = amdgpu_mn_release,
>> - .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
>> - .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>> + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
>> + .release = amdgpu_hmm_mirror_release
>> },
>> [AMDGPU_MN_TYPE_HSA] = {
>> - .release = amdgpu_mn_release,
>> - .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
>> - .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>> + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
>> + .release = amdgpu_hmm_mirror_release
>> },
>> };
>> -/* Low bits of any reasonable mm pointer will be unused due to struct
>> - * alignment. Use these bits to make a unique key from the mm pointer
>> - * and notifier type.
>> - */
>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>> -
>> /**
>> - * amdgpu_mn_get - create notifier context
>> + * amdgpu_mn_get - create HMM mirror context
>> *
>> * @adev: amdgpu device pointer
>> * @type: type of MMU notifier context
>> *
>> - * Creates a notifier context for current->mm.
>> + * Creates a HMM mirror context for current->mm.
>> */
>> struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>> enum amdgpu_mn_type type)
>> @@ -401,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct
>> amdgpu_device *adev,
>> amn->mm = mm;
>> init_rwsem(&amn->lock);
>> amn->type = type;
>> - amn->mn.ops = &amdgpu_mn_ops[type];
>> amn->objects = RB_ROOT_CACHED;
>> mutex_init(&amn->read_lock);
>> atomic_set(&amn->recursion, 0);
>> - r = __mmu_notifier_register(&amn->mn, mm);
>> + amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
>> + r = hmm_mirror_register(&amn->mirror, mm);
>> if (r)
>> goto free_amn;
>> @@ -432,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct
>> amdgpu_device *adev,
>> * @bo: amdgpu buffer object
>> * @addr: userptr addr we should monitor
>> *
>> - * Registers an MMU notifier for the given BO at the specified address.
>> + * Registers an HMM mirror for the given BO at the specified address.
>> * Returns 0 on success, -ERRNO if anything goes wrong.
>> */
>> int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
>> @@ -488,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo,
>> unsigned long addr)
>> }
>> /**
>> - * amdgpu_mn_unregister - unregister a BO for notifier updates
>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>> *
>> * @bo: amdgpu buffer object
>> *
>> - * Remove any registration of MMU notifier updates from the buffer
>> object.
>> + * Remove any registration of HMM mirror updates from the buffer object.
>> */
>> void amdgpu_mn_unregister(struct amdgpu_bo *bo)
>> {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> index eb0f432f78fe..0a51fd00021c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>> AMDGPU_MN_TYPE_HSA,
>> };
>> -#if defined(CONFIG_MMU_NOTIFIER)
>> +#if defined(CONFIG_HMM_MIRROR)
>> void amdgpu_mn_lock(struct amdgpu_mn *mn);
>> void amdgpu_mn_unlock(struct amdgpu_mn *mn);
>> struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
[not found] ` <20190204150613.5837-2-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-04 15:18 ` Christian König
[not found] ` <71e6094c-a257-7aa3-854c-9a9dff163b01-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
0 siblings, 1 reply; 11+ messages in thread
From: Christian König @ 2019-02-04 15:18 UTC (permalink / raw)
To: Yang, Philip, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Am 04.02.19 um 16:06 schrieb Yang, Philip:
> Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
> DRM_AMDGPU_USERPTR Kconfig.
>
> It supports both KFD userptr and gfx userptr paths.
>
> The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
> kernel now.
>
> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/Kconfig | 6 +-
> drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 139 +++++++++++--------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 2 +-
> 4 files changed, 67 insertions(+), 82 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
> index 9221e5489069..960a63355705 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
> config DRM_AMDGPU_USERPTR
> bool "Always enable userptr write support"
> depends on DRM_AMDGPU
> - select MMU_NOTIFIER
> + select HMM_MIRROR
> help
> - This option selects CONFIG_MMU_NOTIFIER if it isn't already
> - selected to enabled full userptr support.
> + This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
> + isn't already selected to enabled full userptr support.
>
> config DRM_AMDGPU_GART_DEBUGFS
> bool "Allow GART access through debugfs"
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 466da5954a68..851001ced5e8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -172,7 +172,7 @@ endif
> amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
> amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
> amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
> +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
>
> include $(FULL_AMD_PATH)/powerplay/Makefile
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index 3e6823fdd939..5d518d2bb9be 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -45,7 +45,7 @@
>
> #include <linux/firmware.h>
> #include <linux/module.h>
> -#include <linux/mmu_notifier.h>
> +#include <linux/hmm.h>
> #include <linux/interval_tree.h>
> #include <drm/drmP.h>
> #include <drm/drm.h>
> @@ -58,7 +58,6 @@
> *
> * @adev: amdgpu device pointer
> * @mm: process address space
> - * @mn: MMU notifier structure
> * @type: type of MMU notifier
> * @work: destruction work item
> * @node: hash table node to find structure by adev and mn
> @@ -66,6 +65,7 @@
> * @objects: interval tree containing amdgpu_mn_nodes
> * @read_lock: mutex for recursive locking of @lock
> * @recursion: depth of recursion
> + * @mirror: HMM mirror function support
> *
> * Data for each amdgpu device and process address space.
> */
> @@ -73,7 +73,6 @@ struct amdgpu_mn {
> /* constant after initialisation */
> struct amdgpu_device *adev;
> struct mm_struct *mm;
> - struct mmu_notifier mn;
> enum amdgpu_mn_type type;
>
> /* only used on destruction */
> @@ -87,6 +86,9 @@ struct amdgpu_mn {
> struct rb_root_cached objects;
> struct mutex read_lock;
> atomic_t recursion;
With HMM we don't need this any more. Please remove it and simplify
amdgpu_mn_read_lock() and amdgpu_mn_read_unlock().
Apart from that looks good to me,
Christian.
> +
> + /* HMM mirror */
> + struct hmm_mirror mirror;
> };
>
> /**
> @@ -103,7 +105,7 @@ struct amdgpu_mn_node {
> };
>
> /**
> - * amdgpu_mn_destroy - destroy the MMU notifier
> + * amdgpu_mn_destroy - destroy the HMM mirror
> *
> * @work: previously sheduled work item
> *
> @@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
> }
> up_write(&amn->lock);
> mutex_unlock(&adev->mn_lock);
> - mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
> +
> + hmm_mirror_unregister(&amn->mirror);
> kfree(amn);
> }
>
> /**
> - * amdgpu_mn_release - callback to notify about mm destruction
> + * amdgpu_hmm_mirror_release - callback to notify about mm destruction
> *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> + * @mirror: the HMM mirror (mm) this callback is about
> *
> - * Shedule a work item to lazy destroy our notifier.
> + * Shedule a work item to lazy destroy HMM mirror.
> */
> -static void amdgpu_mn_release(struct mmu_notifier *mn,
> - struct mm_struct *mm)
> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
> {
> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
>
> INIT_WORK(&amn->work, amdgpu_mn_destroy);
> schedule_work(&amn->work);
> }
>
> -
> /**
> * amdgpu_mn_lock - take the write side lock for this notifier
> *
> @@ -237,141 +237,126 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
> /**
> * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
> *
> - * @mn: our notifier
> - * @range: mmu notifier context
> + * @mirror: the hmm_mirror (mm) is about to update
> + * @update: the update start, end address
> *
> * Block for operations on BOs to finish and mark pages as accessed and
> * potentially dirty.
> */
> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
> - const struct mmu_notifier_range *range)
> +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
> + const struct hmm_update *update)
> {
> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
> + unsigned long start = update->start;
> + unsigned long end = update->end;
> + bool blockable = update->blockable;
> struct interval_tree_node *it;
> - unsigned long end;
>
> /* notification is exclusive, but interval is inclusive */
> - end = range->end - 1;
> + end -= 1;
>
> /* TODO we should be able to split locking for interval tree and
> * amdgpu_mn_invalidate_node
> */
> - if (amdgpu_mn_read_lock(amn, range->blockable))
> + if (amdgpu_mn_read_lock(amn, blockable))
> return -EAGAIN;
>
> - it = interval_tree_iter_first(&amn->objects, range->start, end);
> + it = interval_tree_iter_first(&amn->objects, start, end);
> while (it) {
> struct amdgpu_mn_node *node;
>
> - if (!range->blockable) {
> + if (!blockable) {
> amdgpu_mn_read_unlock(amn);
> return -EAGAIN;
> }
>
> node = container_of(it, struct amdgpu_mn_node, it);
> - it = interval_tree_iter_next(it, range->start, end);
> + it = interval_tree_iter_next(it, start, end);
>
> - amdgpu_mn_invalidate_node(node, range->start, end);
> + amdgpu_mn_invalidate_node(node, start, end);
> }
>
> + amdgpu_mn_read_unlock(amn);
> +
> return 0;
> }
>
> /**
> * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
> *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> + * @mirror: the hmm_mirror (mm) is about to update
> + * @update: the update start, end address
> *
> * We temporarily evict all BOs between start and end. This
> * necessitates evicting all user-mode queues of the process. The BOs
> * are restorted in amdgpu_mn_invalidate_range_end_hsa.
> */
> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
> - const struct mmu_notifier_range *range)
> +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
> + const struct hmm_update *update)
> {
> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
> + unsigned long start = update->start;
> + unsigned long end = update->end;
> + bool blockable = update->blockable;
> struct interval_tree_node *it;
> - unsigned long end;
>
> /* notification is exclusive, but interval is inclusive */
> - end = range->end - 1;
> + end -= 1;
>
> - if (amdgpu_mn_read_lock(amn, range->blockable))
> + if (amdgpu_mn_read_lock(amn, blockable))
> return -EAGAIN;
>
> - it = interval_tree_iter_first(&amn->objects, range->start, end);
> + it = interval_tree_iter_first(&amn->objects, start, end);
> while (it) {
> struct amdgpu_mn_node *node;
> struct amdgpu_bo *bo;
>
> - if (!range->blockable) {
> + if (!blockable) {
> amdgpu_mn_read_unlock(amn);
> return -EAGAIN;
> }
>
> node = container_of(it, struct amdgpu_mn_node, it);
> - it = interval_tree_iter_next(it, range->start, end);
> + it = interval_tree_iter_next(it, start, end);
>
> list_for_each_entry(bo, &node->bos, mn_list) {
> struct kgd_mem *mem = bo->kfd_bo;
>
> if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
> - range->start,
> - end))
> - amdgpu_amdkfd_evict_userptr(mem, range->mm);
> + start, end))
> + amdgpu_amdkfd_evict_userptr(mem, amn->mm);
> }
> }
>
> + amdgpu_mn_read_unlock(amn);
> +
> return 0;
> }
>
> -/**
> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
> - *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> - *
> - * Release the lock again to allow new command submissions.
> +/* Low bits of any reasonable mm pointer will be unused due to struct
> + * alignment. Use these bits to make a unique key from the mm pointer
> + * and notifier type.
> */
> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
> - const struct mmu_notifier_range *range)
> -{
> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
> -
> - amdgpu_mn_read_unlock(amn);
> -}
> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>
> -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
> [AMDGPU_MN_TYPE_GFX] = {
> - .release = amdgpu_mn_release,
> - .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
> - .invalidate_range_end = amdgpu_mn_invalidate_range_end,
> + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
> + .release = amdgpu_hmm_mirror_release
> },
> [AMDGPU_MN_TYPE_HSA] = {
> - .release = amdgpu_mn_release,
> - .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
> - .invalidate_range_end = amdgpu_mn_invalidate_range_end,
> + .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
> + .release = amdgpu_hmm_mirror_release
> },
> };
>
> -/* Low bits of any reasonable mm pointer will be unused due to struct
> - * alignment. Use these bits to make a unique key from the mm pointer
> - * and notifier type.
> - */
> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
> -
> /**
> - * amdgpu_mn_get - create notifier context
> + * amdgpu_mn_get - create HMM mirror context
> *
> * @adev: amdgpu device pointer
> * @type: type of MMU notifier context
> *
> - * Creates a notifier context for current->mm.
> + * Creates a HMM mirror context for current->mm.
> */
> struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> enum amdgpu_mn_type type)
> @@ -401,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> amn->mm = mm;
> init_rwsem(&amn->lock);
> amn->type = type;
> - amn->mn.ops = &amdgpu_mn_ops[type];
> amn->objects = RB_ROOT_CACHED;
> mutex_init(&amn->read_lock);
> atomic_set(&amn->recursion, 0);
>
> - r = __mmu_notifier_register(&amn->mn, mm);
> + amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
> + r = hmm_mirror_register(&amn->mirror, mm);
> if (r)
> goto free_amn;
>
> @@ -432,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> * @bo: amdgpu buffer object
> * @addr: userptr addr we should monitor
> *
> - * Registers an MMU notifier for the given BO at the specified address.
> + * Registers an HMM mirror for the given BO at the specified address.
> * Returns 0 on success, -ERRNO if anything goes wrong.
> */
> int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
> @@ -488,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
> }
>
> /**
> - * amdgpu_mn_unregister - unregister a BO for notifier updates
> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
> *
> * @bo: amdgpu buffer object
> *
> - * Remove any registration of MMU notifier updates from the buffer object.
> + * Remove any registration of HMM mirror updates from the buffer object.
> */
> void amdgpu_mn_unregister(struct amdgpu_bo *bo)
> {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> index eb0f432f78fe..0a51fd00021c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
> AMDGPU_MN_TYPE_HSA,
> };
>
> -#if defined(CONFIG_MMU_NOTIFIER)
> +#if defined(CONFIG_HMM_MIRROR)
> void amdgpu_mn_lock(struct amdgpu_mn *mn);
> void amdgpu_mn_unlock(struct amdgpu_mn *mn);
> struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
[not found] ` <20190204150613.5837-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
@ 2019-02-04 15:06 ` Yang, Philip
[not found] ` <20190204150613.5837-2-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 11+ messages in thread
From: Yang, Philip @ 2019-02-04 15:06 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Yang, Philip
Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
DRM_AMDGPU_USERPTR Kconfig.
It supports both KFD userptr and gfx userptr paths.
The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
kernel now.
Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/Kconfig | 6 +-
drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 139 +++++++++++--------------
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 2 +-
4 files changed, 67 insertions(+), 82 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 9221e5489069..960a63355705 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
config DRM_AMDGPU_USERPTR
bool "Always enable userptr write support"
depends on DRM_AMDGPU
- select MMU_NOTIFIER
+ select HMM_MIRROR
help
- This option selects CONFIG_MMU_NOTIFIER if it isn't already
- selected to enabled full userptr support.
+ This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
+ isn't already selected to enabled full userptr support.
config DRM_AMDGPU_GART_DEBUGFS
bool "Allow GART access through debugfs"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 466da5954a68..851001ced5e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -172,7 +172,7 @@ endif
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
include $(FULL_AMD_PATH)/powerplay/Makefile
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 3e6823fdd939..5d518d2bb9be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -45,7 +45,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
-#include <linux/mmu_notifier.h>
+#include <linux/hmm.h>
#include <linux/interval_tree.h>
#include <drm/drmP.h>
#include <drm/drm.h>
@@ -58,7 +58,6 @@
*
* @adev: amdgpu device pointer
* @mm: process address space
- * @mn: MMU notifier structure
* @type: type of MMU notifier
* @work: destruction work item
* @node: hash table node to find structure by adev and mn
@@ -66,6 +65,7 @@
* @objects: interval tree containing amdgpu_mn_nodes
* @read_lock: mutex for recursive locking of @lock
* @recursion: depth of recursion
+ * @mirror: HMM mirror function support
*
* Data for each amdgpu device and process address space.
*/
@@ -73,7 +73,6 @@ struct amdgpu_mn {
/* constant after initialisation */
struct amdgpu_device *adev;
struct mm_struct *mm;
- struct mmu_notifier mn;
enum amdgpu_mn_type type;
/* only used on destruction */
@@ -87,6 +86,9 @@ struct amdgpu_mn {
struct rb_root_cached objects;
struct mutex read_lock;
atomic_t recursion;
+
+ /* HMM mirror */
+ struct hmm_mirror mirror;
};
/**
@@ -103,7 +105,7 @@ struct amdgpu_mn_node {
};
/**
- * amdgpu_mn_destroy - destroy the MMU notifier
+ * amdgpu_mn_destroy - destroy the HMM mirror
*
* @work: previously sheduled work item
*
@@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
}
up_write(&amn->lock);
mutex_unlock(&adev->mn_lock);
- mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
+
+ hmm_mirror_unregister(&amn->mirror);
kfree(amn);
}
/**
- * amdgpu_mn_release - callback to notify about mm destruction
+ * amdgpu_hmm_mirror_release - callback to notify about mm destruction
*
- * @mn: our notifier
- * @mm: the mm this callback is about
+ * @mirror: the HMM mirror (mm) this callback is about
*
- * Shedule a work item to lazy destroy our notifier.
+ * Shedule a work item to lazy destroy HMM mirror.
*/
-static void amdgpu_mn_release(struct mmu_notifier *mn,
- struct mm_struct *mm)
+static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
INIT_WORK(&amn->work, amdgpu_mn_destroy);
schedule_work(&amn->work);
}
-
/**
* amdgpu_mn_lock - take the write side lock for this notifier
*
@@ -237,141 +237,126 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
/**
* amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
*
- * @mn: our notifier
- * @range: mmu notifier context
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
*
* Block for operations on BOs to finish and mark pages as accessed and
* potentially dirty.
*/
-static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
+static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
+ const struct hmm_update *update)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+ unsigned long start = update->start;
+ unsigned long end = update->end;
+ bool blockable = update->blockable;
struct interval_tree_node *it;
- unsigned long end;
/* notification is exclusive, but interval is inclusive */
- end = range->end - 1;
+ end -= 1;
/* TODO we should be able to split locking for interval tree and
* amdgpu_mn_invalidate_node
*/
- if (amdgpu_mn_read_lock(amn, range->blockable))
+ if (amdgpu_mn_read_lock(amn, blockable))
return -EAGAIN;
- it = interval_tree_iter_first(&amn->objects, range->start, end);
+ it = interval_tree_iter_first(&amn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
- if (!range->blockable) {
+ if (!blockable) {
amdgpu_mn_read_unlock(amn);
return -EAGAIN;
}
node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, range->start, end);
+ it = interval_tree_iter_next(it, start, end);
- amdgpu_mn_invalidate_node(node, range->start, end);
+ amdgpu_mn_invalidate_node(node, start, end);
}
+ amdgpu_mn_read_unlock(amn);
+
return 0;
}
/**
* amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
*
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
*
* We temporarily evict all BOs between start and end. This
* necessitates evicting all user-mode queues of the process. The BOs
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
*/
-static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
+static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
+ const struct hmm_update *update)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+ unsigned long start = update->start;
+ unsigned long end = update->end;
+ bool blockable = update->blockable;
struct interval_tree_node *it;
- unsigned long end;
/* notification is exclusive, but interval is inclusive */
- end = range->end - 1;
+ end -= 1;
- if (amdgpu_mn_read_lock(amn, range->blockable))
+ if (amdgpu_mn_read_lock(amn, blockable))
return -EAGAIN;
- it = interval_tree_iter_first(&amn->objects, range->start, end);
+ it = interval_tree_iter_first(&amn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
struct amdgpu_bo *bo;
- if (!range->blockable) {
+ if (!blockable) {
amdgpu_mn_read_unlock(amn);
return -EAGAIN;
}
node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, range->start, end);
+ it = interval_tree_iter_next(it, start, end);
list_for_each_entry(bo, &node->bos, mn_list) {
struct kgd_mem *mem = bo->kfd_bo;
if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
- range->start,
- end))
- amdgpu_amdkfd_evict_userptr(mem, range->mm);
+ start, end))
+ amdgpu_amdkfd_evict_userptr(mem, amn->mm);
}
}
+ amdgpu_mn_read_unlock(amn);
+
return 0;
}
-/**
- * amdgpu_mn_invalidate_range_end - callback to notify about mm change
- *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
- *
- * Release the lock again to allow new command submissions.
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
*/
-static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
-{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
-
- amdgpu_mn_read_unlock(amn);
-}
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
[AMDGPU_MN_TYPE_GFX] = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
+ .release = amdgpu_hmm_mirror_release
},
[AMDGPU_MN_TYPE_HSA] = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
+ .release = amdgpu_hmm_mirror_release
},
};
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
/**
- * amdgpu_mn_get - create notifier context
+ * amdgpu_mn_get - create HMM mirror context
*
* @adev: amdgpu device pointer
* @type: type of MMU notifier context
*
- * Creates a notifier context for current->mm.
+ * Creates a HMM mirror context for current->mm.
*/
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type)
@@ -401,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
amn->mm = mm;
init_rwsem(&amn->lock);
amn->type = type;
- amn->mn.ops = &amdgpu_mn_ops[type];
amn->objects = RB_ROOT_CACHED;
mutex_init(&amn->read_lock);
atomic_set(&amn->recursion, 0);
- r = __mmu_notifier_register(&amn->mn, mm);
+ amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
+ r = hmm_mirror_register(&amn->mirror, mm);
if (r)
goto free_amn;
@@ -432,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
* @bo: amdgpu buffer object
* @addr: userptr addr we should monitor
*
- * Registers an MMU notifier for the given BO at the specified address.
+ * Registers an HMM mirror for the given BO at the specified address.
* Returns 0 on success, -ERRNO if anything goes wrong.
*/
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
@@ -488,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
}
/**
- * amdgpu_mn_unregister - unregister a BO for notifier updates
+ * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
*
* @bo: amdgpu buffer object
*
- * Remove any registration of MMU notifier updates from the buffer object.
+ * Remove any registration of HMM mirror updates from the buffer object.
*/
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index eb0f432f78fe..0a51fd00021c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -34,7 +34,7 @@ enum amdgpu_mn_type {
AMDGPU_MN_TYPE_HSA,
};
-#if defined(CONFIG_MMU_NOTIFIER)
+#if defined(CONFIG_HMM_MIRROR)
void amdgpu_mn_lock(struct amdgpu_mn *mn);
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
[not found] ` <BY1PR12MB0502882984A59380E5F17F66B4AA0-PicGAnIBOobrCwm+z9iKNgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2018-12-13 21:15 ` Yang, Philip
0 siblings, 0 replies; 11+ messages in thread
From: Yang, Philip @ 2018-12-13 21:15 UTC (permalink / raw)
To: Zhou, David(ChunMing), amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
I thought to change the filename to amdgpu_hmm.c/h, then change the data
structure,
variables, function name from amdgpu_mn_* to amdgpu_hmm_*, seems too
many changes,
so I gave up that change.
Philip
On 2018-12-07 7:00 a.m., Zhou, David(ChunMing) wrote:
> Even you should rename amdgpu_mn.c/h to amdgpu_hmm.c/h.
>
> -David
>
>> -----Original Message-----
>> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Yang,
>> Philip
>> Sent: Friday, December 07, 2018 5:03 AM
>> To: amd-gfx@lists.freedesktop.org
>> Cc: Yang, Philip <Philip.Yang@amd.com>
>> Subject: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu
>> notifier v6
>>
>> Replace our MMU notifier with
>> hmm_mirror_ops.sync_cpu_device_pagetables
>> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a
>> dependency in DRM_AMDGPU_USERPTR Kconfig.
>>
>> It supports both KFD userptr and gfx userptr paths.
>>
>> The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
>> kernel now.
>>
>> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
>> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/Kconfig | 6 +-
>> drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 122 ++++++++++-------------
>> --
>> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 2 +-
>> 4 files changed, 55 insertions(+), 77 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig
>> b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> index 9221e5489069..960a63355705 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
>> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
>> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK config
>> DRM_AMDGPU_USERPTR
>> bool "Always enable userptr write support"
>> depends on DRM_AMDGPU
>> - select MMU_NOTIFIER
>> + select HMM_MIRROR
>> help
>> - This option selects CONFIG_MMU_NOTIFIER if it isn't already
>> - selected to enabled full userptr support.
>> + This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
>> + isn't already selected to enabled full userptr support.
>>
>> config DRM_AMDGPU_GART_DEBUGFS
>> bool "Allow GART access through debugfs"
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index f76bcb9c45e4..675efc850ff4 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -172,7 +172,7 @@ endif
>> amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
>> amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
>> amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
>> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
>> +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
>>
>> include $(FULL_AMD_PATH)/powerplay/Makefile
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> index e55508b39496..5d518d2bb9be 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
>> @@ -45,7 +45,7 @@
>>
>> #include <linux/firmware.h>
>> #include <linux/module.h>
>> -#include <linux/mmu_notifier.h>
>> +#include <linux/hmm.h>
>> #include <linux/interval_tree.h>
>> #include <drm/drmP.h>
>> #include <drm/drm.h>
>> @@ -58,7 +58,6 @@
>> *
>> * @adev: amdgpu device pointer
>> * @mm: process address space
>> - * @mn: MMU notifier structure
>> * @type: type of MMU notifier
>> * @work: destruction work item
>> * @node: hash table node to find structure by adev and mn @@ -66,6 +65,7
>> @@
>> * @objects: interval tree containing amdgpu_mn_nodes
>> * @read_lock: mutex for recursive locking of @lock
>> * @recursion: depth of recursion
>> + * @mirror: HMM mirror function support
>> *
>> * Data for each amdgpu device and process address space.
>> */
>> @@ -73,7 +73,6 @@ struct amdgpu_mn {
>> /* constant after initialisation */
>> struct amdgpu_device *adev;
>> struct mm_struct *mm;
>> - struct mmu_notifier mn;
>> enum amdgpu_mn_type type;
>>
>> /* only used on destruction */
>> @@ -87,6 +86,9 @@ struct amdgpu_mn {
>> struct rb_root_cached objects;
>> struct mutex read_lock;
>> atomic_t recursion;
>> +
>> + /* HMM mirror */
>> + struct hmm_mirror mirror;
>> };
>>
>> /**
>> @@ -103,7 +105,7 @@ struct amdgpu_mn_node { };
>>
>> /**
>> - * amdgpu_mn_destroy - destroy the MMU notifier
>> + * amdgpu_mn_destroy - destroy the HMM mirror
>> *
>> * @work: previously sheduled work item
>> *
>> @@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct
>> *work)
>> }
>> up_write(&amn->lock);
>> mutex_unlock(&adev->mn_lock);
>> - mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
>> +
>> + hmm_mirror_unregister(&amn->mirror);
>> kfree(amn);
>> }
>>
>> /**
>> - * amdgpu_mn_release - callback to notify about mm destruction
>> + * amdgpu_hmm_mirror_release - callback to notify about mm destruction
>> *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> + * @mirror: the HMM mirror (mm) this callback is about
>> *
>> - * Shedule a work item to lazy destroy our notifier.
>> + * Shedule a work item to lazy destroy HMM mirror.
>> */
>> -static void amdgpu_mn_release(struct mmu_notifier *mn,
>> - struct mm_struct *mm)
>> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
>> {
>> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
>> mn);
>> + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
>> +mirror);
>>
>> INIT_WORK(&amn->work, amdgpu_mn_destroy);
>> schedule_work(&amn->work);
>> }
>>
>> -
>> /**
>> * amdgpu_mn_lock - take the write side lock for this notifier
>> *
>> @@ -237,21 +237,19 @@ static void amdgpu_mn_invalidate_node(struct
>> amdgpu_mn_node *node,
>> /**
>> * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm
>> change
>> *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> + * @mirror: the hmm_mirror (mm) is about to update
>> + * @update: the update start, end address
>> *
>> * Block for operations on BOs to finish and mark pages as accessed and
>> * potentially dirty.
>> */
>> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
>> - struct mm_struct *mm,
>> - unsigned long start,
>> - unsigned long end,
>> - bool blockable)
>> +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
>> + const struct hmm_update *update)
>> {
>> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
>> mn);
>> + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
>> mirror);
>> + unsigned long start = update->start;
>> + unsigned long end = update->end;
>> + bool blockable = update->blockable;
>> struct interval_tree_node *it;
>>
>> /* notification is exclusive, but interval is inclusive */ @@ -278,28
>> +276,28 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct
>> mmu_notifier *mn,
>> amdgpu_mn_invalidate_node(node, start, end);
>> }
>>
>> + amdgpu_mn_read_unlock(amn);
>> +
>> return 0;
>> }
>>
>> /**
>> * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm
>> change
>> *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> + * @mirror: the hmm_mirror (mm) is about to update
>> + * @update: the update start, end address
>> *
>> * We temporarily evict all BOs between start and end. This
>> * necessitates evicting all user-mode queues of the process. The BOs
>> * are restorted in amdgpu_mn_invalidate_range_end_hsa.
>> */
>> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier
>> *mn,
>> - struct mm_struct *mm,
>> - unsigned long start,
>> - unsigned long end,
>> - bool blockable)
>> +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
>> + const struct hmm_update *update)
>> {
>> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
>> mn);
>> + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
>> mirror);
>> + unsigned long start = update->start;
>> + unsigned long end = update->end;
>> + bool blockable = update->blockable;
>> struct interval_tree_node *it;
>>
>> /* notification is exclusive, but interval is inclusive */ @@ -326,59
>> +324,39 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct
>> mmu_notifier *mn,
>>
>> if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
>> start, end))
>> - amdgpu_amdkfd_evict_userptr(mem, mm);
>> + amdgpu_amdkfd_evict_userptr(mem, amn-
>>> mm);
>> }
>> }
>>
>> + amdgpu_mn_read_unlock(amn);
>> +
>> return 0;
>> }
>>
>> -/**
>> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
>> - *
>> - * @mn: our notifier
>> - * @mm: the mm this callback is about
>> - * @start: start of updated range
>> - * @end: end of updated range
>> - *
>> - * Release the lock again to allow new command submissions.
>> +/* Low bits of any reasonable mm pointer will be unused due to struct
>> + * alignment. Use these bits to make a unique key from the mm pointer
>> + * and notifier type.
>> */
>> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
>> - struct mm_struct *mm,
>> - unsigned long start,
>> - unsigned long end)
>> -{
>> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
>> mn);
>> -
>> - amdgpu_mn_read_unlock(amn);
>> -}
>> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>>
>> -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
>> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
>> [AMDGPU_MN_TYPE_GFX] = {
>> - .release = amdgpu_mn_release,
>> - .invalidate_range_start =
>> amdgpu_mn_invalidate_range_start_gfx,
>> - .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>> + .sync_cpu_device_pagetables =
>> amdgpu_mn_sync_pagetables_gfx,
>> + .release = amdgpu_hmm_mirror_release
>> },
>> [AMDGPU_MN_TYPE_HSA] = {
>> - .release = amdgpu_mn_release,
>> - .invalidate_range_start =
>> amdgpu_mn_invalidate_range_start_hsa,
>> - .invalidate_range_end = amdgpu_mn_invalidate_range_end,
>> + .sync_cpu_device_pagetables =
>> amdgpu_mn_sync_pagetables_hsa,
>> + .release = amdgpu_hmm_mirror_release
>> },
>> };
>>
>> -/* Low bits of any reasonable mm pointer will be unused due to struct
>> - * alignment. Use these bits to make a unique key from the mm pointer
>> - * and notifier type.
>> - */
>> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>> -
>> /**
>> - * amdgpu_mn_get - create notifier context
>> + * amdgpu_mn_get - create HMM mirror context
>> *
>> * @adev: amdgpu device pointer
>> * @type: type of MMU notifier context
>> *
>> - * Creates a notifier context for current->mm.
>> + * Creates a HMM mirror context for current->mm.
>> */
>> struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
>> enum amdgpu_mn_type type)
>> @@ -408,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct
>> amdgpu_device *adev,
>> amn->mm = mm;
>> init_rwsem(&amn->lock);
>> amn->type = type;
>> - amn->mn.ops = &amdgpu_mn_ops[type];
>> amn->objects = RB_ROOT_CACHED;
>> mutex_init(&amn->read_lock);
>> atomic_set(&amn->recursion, 0);
>>
>> - r = __mmu_notifier_register(&amn->mn, mm);
>> + amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
>> + r = hmm_mirror_register(&amn->mirror, mm);
>> if (r)
>> goto free_amn;
>>
>> @@ -439,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct
>> amdgpu_device *adev,
>> * @bo: amdgpu buffer object
>> * @addr: userptr addr we should monitor
>> *
>> - * Registers an MMU notifier for the given BO at the specified address.
>> + * Registers an HMM mirror for the given BO at the specified address.
>> * Returns 0 on success, -ERRNO if anything goes wrong.
>> */
>> int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) @@ -
>> 495,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo,
>> unsigned long addr) }
>>
>> /**
>> - * amdgpu_mn_unregister - unregister a BO for notifier updates
>> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
>> *
>> * @bo: amdgpu buffer object
>> *
>> - * Remove any registration of MMU notifier updates from the buffer object.
>> + * Remove any registration of HMM mirror updates from the buffer object.
>> */
>> void amdgpu_mn_unregister(struct amdgpu_bo *bo) { diff --git
>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> index eb0f432f78fe..0a51fd00021c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
>> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
>> AMDGPU_MN_TYPE_HSA,
>> };
>>
>> -#if defined(CONFIG_MMU_NOTIFIER)
>> +#if defined(CONFIG_HMM_MIRROR)
>> void amdgpu_mn_lock(struct amdgpu_mn *mn); void
>> amdgpu_mn_unlock(struct amdgpu_mn *mn); struct amdgpu_mn
>> *amdgpu_mn_get(struct amdgpu_device *adev,
>> --
>> 2.17.1
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* RE: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
[not found] ` <20181206210235.12036-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
@ 2018-12-07 12:00 ` Zhou, David(ChunMing)
[not found] ` <BY1PR12MB0502882984A59380E5F17F66B4AA0-PicGAnIBOobrCwm+z9iKNgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
0 siblings, 1 reply; 11+ messages in thread
From: Zhou, David(ChunMing) @ 2018-12-07 12:00 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Yang, Philip
Even you should rename amdgpu_mn.c/h to amdgpu_hmm.c/h.
-David
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Yang,
> Philip
> Sent: Friday, December 07, 2018 5:03 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Yang, Philip <Philip.Yang@amd.com>
> Subject: [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu
> notifier v6
>
> Replace our MMU notifier with
> hmm_mirror_ops.sync_cpu_device_pagetables
> callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a
> dependency in DRM_AMDGPU_USERPTR Kconfig.
>
> It supports both KFD userptr and gfx userptr paths.
>
> The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
> kernel now.
>
> Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/Kconfig | 6 +-
> drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 122 ++++++++++-------------
> --
> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 2 +-
> 4 files changed, 55 insertions(+), 77 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig
> b/drivers/gpu/drm/amd/amdgpu/Kconfig
> index 9221e5489069..960a63355705 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Kconfig
> +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
> @@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK config
> DRM_AMDGPU_USERPTR
> bool "Always enable userptr write support"
> depends on DRM_AMDGPU
> - select MMU_NOTIFIER
> + select HMM_MIRROR
> help
> - This option selects CONFIG_MMU_NOTIFIER if it isn't already
> - selected to enabled full userptr support.
> + This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
> + isn't already selected to enabled full userptr support.
>
> config DRM_AMDGPU_GART_DEBUGFS
> bool "Allow GART access through debugfs"
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
> b/drivers/gpu/drm/amd/amdgpu/Makefile
> index f76bcb9c45e4..675efc850ff4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -172,7 +172,7 @@ endif
> amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
> amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
> amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
> -amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
> +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
>
> include $(FULL_AMD_PATH)/powerplay/Makefile
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> index e55508b39496..5d518d2bb9be 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
> @@ -45,7 +45,7 @@
>
> #include <linux/firmware.h>
> #include <linux/module.h>
> -#include <linux/mmu_notifier.h>
> +#include <linux/hmm.h>
> #include <linux/interval_tree.h>
> #include <drm/drmP.h>
> #include <drm/drm.h>
> @@ -58,7 +58,6 @@
> *
> * @adev: amdgpu device pointer
> * @mm: process address space
> - * @mn: MMU notifier structure
> * @type: type of MMU notifier
> * @work: destruction work item
> * @node: hash table node to find structure by adev and mn @@ -66,6 +65,7
> @@
> * @objects: interval tree containing amdgpu_mn_nodes
> * @read_lock: mutex for recursive locking of @lock
> * @recursion: depth of recursion
> + * @mirror: HMM mirror function support
> *
> * Data for each amdgpu device and process address space.
> */
> @@ -73,7 +73,6 @@ struct amdgpu_mn {
> /* constant after initialisation */
> struct amdgpu_device *adev;
> struct mm_struct *mm;
> - struct mmu_notifier mn;
> enum amdgpu_mn_type type;
>
> /* only used on destruction */
> @@ -87,6 +86,9 @@ struct amdgpu_mn {
> struct rb_root_cached objects;
> struct mutex read_lock;
> atomic_t recursion;
> +
> + /* HMM mirror */
> + struct hmm_mirror mirror;
> };
>
> /**
> @@ -103,7 +105,7 @@ struct amdgpu_mn_node { };
>
> /**
> - * amdgpu_mn_destroy - destroy the MMU notifier
> + * amdgpu_mn_destroy - destroy the HMM mirror
> *
> * @work: previously sheduled work item
> *
> @@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct
> *work)
> }
> up_write(&amn->lock);
> mutex_unlock(&adev->mn_lock);
> - mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
> +
> + hmm_mirror_unregister(&amn->mirror);
> kfree(amn);
> }
>
> /**
> - * amdgpu_mn_release - callback to notify about mm destruction
> + * amdgpu_hmm_mirror_release - callback to notify about mm destruction
> *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> + * @mirror: the HMM mirror (mm) this callback is about
> *
> - * Shedule a work item to lazy destroy our notifier.
> + * Shedule a work item to lazy destroy HMM mirror.
> */
> -static void amdgpu_mn_release(struct mmu_notifier *mn,
> - struct mm_struct *mm)
> +static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
> {
> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
> mn);
> + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
> +mirror);
>
> INIT_WORK(&amn->work, amdgpu_mn_destroy);
> schedule_work(&amn->work);
> }
>
> -
> /**
> * amdgpu_mn_lock - take the write side lock for this notifier
> *
> @@ -237,21 +237,19 @@ static void amdgpu_mn_invalidate_node(struct
> amdgpu_mn_node *node,
> /**
> * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm
> change
> *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> + * @mirror: the hmm_mirror (mm) is about to update
> + * @update: the update start, end address
> *
> * Block for operations on BOs to finish and mark pages as accessed and
> * potentially dirty.
> */
> -static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
> - struct mm_struct *mm,
> - unsigned long start,
> - unsigned long end,
> - bool blockable)
> +static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
> + const struct hmm_update *update)
> {
> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
> mn);
> + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
> mirror);
> + unsigned long start = update->start;
> + unsigned long end = update->end;
> + bool blockable = update->blockable;
> struct interval_tree_node *it;
>
> /* notification is exclusive, but interval is inclusive */ @@ -278,28
> +276,28 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct
> mmu_notifier *mn,
> amdgpu_mn_invalidate_node(node, start, end);
> }
>
> + amdgpu_mn_read_unlock(amn);
> +
> return 0;
> }
>
> /**
> * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm
> change
> *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> + * @mirror: the hmm_mirror (mm) is about to update
> + * @update: the update start, end address
> *
> * We temporarily evict all BOs between start and end. This
> * necessitates evicting all user-mode queues of the process. The BOs
> * are restorted in amdgpu_mn_invalidate_range_end_hsa.
> */
> -static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier
> *mn,
> - struct mm_struct *mm,
> - unsigned long start,
> - unsigned long end,
> - bool blockable)
> +static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
> + const struct hmm_update *update)
> {
> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
> mn);
> + struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn,
> mirror);
> + unsigned long start = update->start;
> + unsigned long end = update->end;
> + bool blockable = update->blockable;
> struct interval_tree_node *it;
>
> /* notification is exclusive, but interval is inclusive */ @@ -326,59
> +324,39 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct
> mmu_notifier *mn,
>
> if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
> start, end))
> - amdgpu_amdkfd_evict_userptr(mem, mm);
> + amdgpu_amdkfd_evict_userptr(mem, amn-
> >mm);
> }
> }
>
> + amdgpu_mn_read_unlock(amn);
> +
> return 0;
> }
>
> -/**
> - * amdgpu_mn_invalidate_range_end - callback to notify about mm change
> - *
> - * @mn: our notifier
> - * @mm: the mm this callback is about
> - * @start: start of updated range
> - * @end: end of updated range
> - *
> - * Release the lock again to allow new command submissions.
> +/* Low bits of any reasonable mm pointer will be unused due to struct
> + * alignment. Use these bits to make a unique key from the mm pointer
> + * and notifier type.
> */
> -static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
> - struct mm_struct *mm,
> - unsigned long start,
> - unsigned long end)
> -{
> - struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn,
> mn);
> -
> - amdgpu_mn_read_unlock(amn);
> -}
> +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
>
> -static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
> +static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
> [AMDGPU_MN_TYPE_GFX] = {
> - .release = amdgpu_mn_release,
> - .invalidate_range_start =
> amdgpu_mn_invalidate_range_start_gfx,
> - .invalidate_range_end = amdgpu_mn_invalidate_range_end,
> + .sync_cpu_device_pagetables =
> amdgpu_mn_sync_pagetables_gfx,
> + .release = amdgpu_hmm_mirror_release
> },
> [AMDGPU_MN_TYPE_HSA] = {
> - .release = amdgpu_mn_release,
> - .invalidate_range_start =
> amdgpu_mn_invalidate_range_start_hsa,
> - .invalidate_range_end = amdgpu_mn_invalidate_range_end,
> + .sync_cpu_device_pagetables =
> amdgpu_mn_sync_pagetables_hsa,
> + .release = amdgpu_hmm_mirror_release
> },
> };
>
> -/* Low bits of any reasonable mm pointer will be unused due to struct
> - * alignment. Use these bits to make a unique key from the mm pointer
> - * and notifier type.
> - */
> -#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
> -
> /**
> - * amdgpu_mn_get - create notifier context
> + * amdgpu_mn_get - create HMM mirror context
> *
> * @adev: amdgpu device pointer
> * @type: type of MMU notifier context
> *
> - * Creates a notifier context for current->mm.
> + * Creates a HMM mirror context for current->mm.
> */
> struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
> enum amdgpu_mn_type type)
> @@ -408,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct
> amdgpu_device *adev,
> amn->mm = mm;
> init_rwsem(&amn->lock);
> amn->type = type;
> - amn->mn.ops = &amdgpu_mn_ops[type];
> amn->objects = RB_ROOT_CACHED;
> mutex_init(&amn->read_lock);
> atomic_set(&amn->recursion, 0);
>
> - r = __mmu_notifier_register(&amn->mn, mm);
> + amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
> + r = hmm_mirror_register(&amn->mirror, mm);
> if (r)
> goto free_amn;
>
> @@ -439,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct
> amdgpu_device *adev,
> * @bo: amdgpu buffer object
> * @addr: userptr addr we should monitor
> *
> - * Registers an MMU notifier for the given BO at the specified address.
> + * Registers an HMM mirror for the given BO at the specified address.
> * Returns 0 on success, -ERRNO if anything goes wrong.
> */
> int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) @@ -
> 495,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo,
> unsigned long addr) }
>
> /**
> - * amdgpu_mn_unregister - unregister a BO for notifier updates
> + * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
> *
> * @bo: amdgpu buffer object
> *
> - * Remove any registration of MMU notifier updates from the buffer object.
> + * Remove any registration of HMM mirror updates from the buffer object.
> */
> void amdgpu_mn_unregister(struct amdgpu_bo *bo) { diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> index eb0f432f78fe..0a51fd00021c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
> @@ -34,7 +34,7 @@ enum amdgpu_mn_type {
> AMDGPU_MN_TYPE_HSA,
> };
>
> -#if defined(CONFIG_MMU_NOTIFIER)
> +#if defined(CONFIG_HMM_MIRROR)
> void amdgpu_mn_lock(struct amdgpu_mn *mn); void
> amdgpu_mn_unlock(struct amdgpu_mn *mn); struct amdgpu_mn
> *amdgpu_mn_get(struct amdgpu_device *adev,
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6
@ 2018-12-06 21:02 Yang, Philip
[not found] ` <20181206210235.12036-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 11+ messages in thread
From: Yang, Philip @ 2018-12-06 21:02 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Yang, Philip
Replace our MMU notifier with hmm_mirror_ops.sync_cpu_device_pagetables
callback. Enable CONFIG_HMM and CONFIG_HMM_MIRROR as a dependency in
DRM_AMDGPU_USERPTR Kconfig.
It supports both KFD userptr and gfx userptr paths.
The depdent HMM patchset from Jérôme Glisse are all merged into 4.20.0
kernel now.
Change-Id: Ie62c3c5e3c5b8521ab3b438d1eff2aa2a003835e
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/Kconfig | 6 +-
drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 122 ++++++++++---------------
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 2 +-
4 files changed, 55 insertions(+), 77 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 9221e5489069..960a63355705 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -26,10 +26,10 @@ config DRM_AMDGPU_CIK
config DRM_AMDGPU_USERPTR
bool "Always enable userptr write support"
depends on DRM_AMDGPU
- select MMU_NOTIFIER
+ select HMM_MIRROR
help
- This option selects CONFIG_MMU_NOTIFIER if it isn't already
- selected to enabled full userptr support.
+ This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
+ isn't already selected to enabled full userptr support.
config DRM_AMDGPU_GART_DEBUGFS
bool "Allow GART access through debugfs"
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index f76bcb9c45e4..675efc850ff4 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -172,7 +172,7 @@ endif
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
include $(FULL_AMD_PATH)/powerplay/Makefile
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index e55508b39496..5d518d2bb9be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -45,7 +45,7 @@
#include <linux/firmware.h>
#include <linux/module.h>
-#include <linux/mmu_notifier.h>
+#include <linux/hmm.h>
#include <linux/interval_tree.h>
#include <drm/drmP.h>
#include <drm/drm.h>
@@ -58,7 +58,6 @@
*
* @adev: amdgpu device pointer
* @mm: process address space
- * @mn: MMU notifier structure
* @type: type of MMU notifier
* @work: destruction work item
* @node: hash table node to find structure by adev and mn
@@ -66,6 +65,7 @@
* @objects: interval tree containing amdgpu_mn_nodes
* @read_lock: mutex for recursive locking of @lock
* @recursion: depth of recursion
+ * @mirror: HMM mirror function support
*
* Data for each amdgpu device and process address space.
*/
@@ -73,7 +73,6 @@ struct amdgpu_mn {
/* constant after initialisation */
struct amdgpu_device *adev;
struct mm_struct *mm;
- struct mmu_notifier mn;
enum amdgpu_mn_type type;
/* only used on destruction */
@@ -87,6 +86,9 @@ struct amdgpu_mn {
struct rb_root_cached objects;
struct mutex read_lock;
atomic_t recursion;
+
+ /* HMM mirror */
+ struct hmm_mirror mirror;
};
/**
@@ -103,7 +105,7 @@ struct amdgpu_mn_node {
};
/**
- * amdgpu_mn_destroy - destroy the MMU notifier
+ * amdgpu_mn_destroy - destroy the HMM mirror
*
* @work: previously sheduled work item
*
@@ -129,28 +131,26 @@ static void amdgpu_mn_destroy(struct work_struct *work)
}
up_write(&amn->lock);
mutex_unlock(&adev->mn_lock);
- mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
+
+ hmm_mirror_unregister(&amn->mirror);
kfree(amn);
}
/**
- * amdgpu_mn_release - callback to notify about mm destruction
+ * amdgpu_hmm_mirror_release - callback to notify about mm destruction
*
- * @mn: our notifier
- * @mm: the mm this callback is about
+ * @mirror: the HMM mirror (mm) this callback is about
*
- * Shedule a work item to lazy destroy our notifier.
+ * Shedule a work item to lazy destroy HMM mirror.
*/
-static void amdgpu_mn_release(struct mmu_notifier *mn,
- struct mm_struct *mm)
+static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
INIT_WORK(&amn->work, amdgpu_mn_destroy);
schedule_work(&amn->work);
}
-
/**
* amdgpu_mn_lock - take the write side lock for this notifier
*
@@ -237,21 +237,19 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
/**
* amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
*
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
*
* Block for operations on BOs to finish and mark pages as accessed and
* potentially dirty.
*/
-static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end,
- bool blockable)
+static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
+ const struct hmm_update *update)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+ unsigned long start = update->start;
+ unsigned long end = update->end;
+ bool blockable = update->blockable;
struct interval_tree_node *it;
/* notification is exclusive, but interval is inclusive */
@@ -278,28 +276,28 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
amdgpu_mn_invalidate_node(node, start, end);
}
+ amdgpu_mn_read_unlock(amn);
+
return 0;
}
/**
* amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
*
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @mirror: the hmm_mirror (mm) is about to update
+ * @update: the update start, end address
*
* We temporarily evict all BOs between start and end. This
* necessitates evicting all user-mode queues of the process. The BOs
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
*/
-static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end,
- bool blockable)
+static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
+ const struct hmm_update *update)
{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+ struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
+ unsigned long start = update->start;
+ unsigned long end = update->end;
+ bool blockable = update->blockable;
struct interval_tree_node *it;
/* notification is exclusive, but interval is inclusive */
@@ -326,59 +324,39 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
start, end))
- amdgpu_amdkfd_evict_userptr(mem, mm);
+ amdgpu_amdkfd_evict_userptr(mem, amn->mm);
}
}
+ amdgpu_mn_read_unlock(amn);
+
return 0;
}
-/**
- * amdgpu_mn_invalidate_range_end - callback to notify about mm change
- *
- * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
- *
- * Release the lock again to allow new command submissions.
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
*/
-static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
-{
- struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
-
- amdgpu_mn_read_unlock(amn);
-}
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
[AMDGPU_MN_TYPE_GFX] = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
+ .release = amdgpu_hmm_mirror_release
},
[AMDGPU_MN_TYPE_HSA] = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
+ .release = amdgpu_hmm_mirror_release
},
};
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
/**
- * amdgpu_mn_get - create notifier context
+ * amdgpu_mn_get - create HMM mirror context
*
* @adev: amdgpu device pointer
* @type: type of MMU notifier context
*
- * Creates a notifier context for current->mm.
+ * Creates a HMM mirror context for current->mm.
*/
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
enum amdgpu_mn_type type)
@@ -408,12 +386,12 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
amn->mm = mm;
init_rwsem(&amn->lock);
amn->type = type;
- amn->mn.ops = &amdgpu_mn_ops[type];
amn->objects = RB_ROOT_CACHED;
mutex_init(&amn->read_lock);
atomic_set(&amn->recursion, 0);
- r = __mmu_notifier_register(&amn->mn, mm);
+ amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
+ r = hmm_mirror_register(&amn->mirror, mm);
if (r)
goto free_amn;
@@ -439,7 +417,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
* @bo: amdgpu buffer object
* @addr: userptr addr we should monitor
*
- * Registers an MMU notifier for the given BO at the specified address.
+ * Registers an HMM mirror for the given BO at the specified address.
* Returns 0 on success, -ERRNO if anything goes wrong.
*/
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
@@ -495,11 +473,11 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
}
/**
- * amdgpu_mn_unregister - unregister a BO for notifier updates
+ * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
*
* @bo: amdgpu buffer object
*
- * Remove any registration of MMU notifier updates from the buffer object.
+ * Remove any registration of HMM mirror updates from the buffer object.
*/
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index eb0f432f78fe..0a51fd00021c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -34,7 +34,7 @@ enum amdgpu_mn_type {
AMDGPU_MN_TYPE_HSA,
};
-#if defined(CONFIG_MMU_NOTIFIER)
+#if defined(CONFIG_HMM_MIRROR)
void amdgpu_mn_lock(struct amdgpu_mn *mn);
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 11+ messages in thread
end of thread, other threads:[~2019-02-04 18:09 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-10 17:02 [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6 Yang, Philip
[not found] ` <20190110170228.10917-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-01-10 17:02 ` [PATCH 2/3] drm/amdkfd: avoid HMM change cause circular lock dependency v2 Yang, Philip
2019-01-10 17:02 ` [PATCH 3/3] drm/amdgpu: replace get_user_pages with HMM address mirror helpers v6 Yang, Philip
[not found] ` <20190110170228.10917-3-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-01-14 15:33 ` Yang, Philip
-- strict thread matches above, loose matches on Subject: below --
2019-02-04 15:06 [PATCH 0/3] Use HMM to replace get_user_pages Yang, Philip
[not found] ` <20190204150613.5837-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-02-04 15:06 ` [PATCH 1/3] drm/amdgpu: use HMM mirror callback to replace mmu notifier v6 Yang, Philip
[not found] ` <20190204150613.5837-2-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2019-02-04 15:18 ` Christian König
[not found] ` <71e6094c-a257-7aa3-854c-9a9dff163b01-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-02-04 17:17 ` Yang, Philip
[not found] ` <ee46d8e7-cf64-eb8c-cfd6-793db28c6c69-5C7GfCeVMHo@public.gmane.org>
2019-02-04 18:09 ` Christian König
2018-12-06 21:02 Yang, Philip
[not found] ` <20181206210235.12036-1-Philip.Yang-5C7GfCeVMHo@public.gmane.org>
2018-12-07 12:00 ` Zhou, David(ChunMing)
[not found] ` <BY1PR12MB0502882984A59380E5F17F66B4AA0-PicGAnIBOobrCwm+z9iKNgdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2018-12-13 21:15 ` Yang, Philip
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.