* [PATCH v2 1/7] drm/amdgpu: Rename vm invalidate lock to status_lock
2022-09-19 17:15 [PATCH v2 0/7] Fix amdgpu_vm_pt_free warning Philip Yang
@ 2022-09-19 17:15 ` Philip Yang
2022-09-20 15:21 ` Felix Kuehling
2022-09-19 17:15 ` [PATCH v2 2/7] drm/amdgpu: Use vm status_lock to protect relocated list Philip Yang
` (5 subsequent siblings)
6 siblings, 1 reply; 11+ messages in thread
From: Philip Yang @ 2022-09-19 17:15 UTC (permalink / raw)
To: amd-gfx; +Cc: Philip Yang, felix.kuehling, christian.koenig
The vm status_lock will be used to protect all vm status lists.
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Christian König <christian.koenig@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 30 +++++++++++++-------------
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +++-
2 files changed, 18 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 690fd4f639f1..596f1ea8babc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -225,9 +225,9 @@ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
{
- spin_lock(&vm_bo->vm->invalidated_lock);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
- spin_unlock(&vm_bo->vm->invalidated_lock);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -256,9 +256,9 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
{
- spin_lock(&vm_bo->vm->invalidated_lock);
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->done);
- spin_unlock(&vm_bo->vm->invalidated_lock);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -936,7 +936,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
gtt_mem, cpu_mem);
}
- spin_lock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
if (!bo_va->base.bo)
continue;
@@ -949,7 +949,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
gtt_mem, cpu_mem);
}
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
}
/**
* amdgpu_vm_bo_update - update all BO mappings in the vm page table
@@ -1290,12 +1290,12 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
return r;
}
- spin_lock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
while (!list_empty(&vm->invalidated)) {
bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
base.vm_status);
resv = bo_va->base.bo->tbo.base.resv;
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
/* Try to reserve the BO to avoid clearing its ptes */
if (!amdgpu_vm_debug && dma_resv_trylock(resv))
@@ -1310,9 +1310,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
if (!clear)
dma_resv_unlock(resv);
- spin_lock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
}
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
return 0;
}
@@ -1763,9 +1763,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
}
}
- spin_lock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
list_del(&bo_va->base.vm_status);
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
list_del(&mapping->list);
@@ -2019,7 +2019,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
INIT_LIST_HEAD(&vm->moved);
INIT_LIST_HEAD(&vm->idle);
INIT_LIST_HEAD(&vm->invalidated);
- spin_lock_init(&vm->invalidated_lock);
+ spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->freed);
INIT_LIST_HEAD(&vm->done);
@@ -2584,7 +2584,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
id = 0;
seq_puts(m, "\tInvalidated BOs:\n");
- spin_lock(&vm->invalidated_lock);
+ spin_lock(&vm->status_lock);
list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
if (!bo_va->base.bo)
continue;
@@ -2599,7 +2599,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
continue;
total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
}
- spin_unlock(&vm->invalidated_lock);
+ spin_unlock(&vm->status_lock);
total_done_objs = id;
seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 9ecb7f663e19..e6dd112d865c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -254,6 +254,9 @@ struct amdgpu_vm {
bool evicting;
unsigned int saved_flags;
+ /* Lock to protect vm_bo add/del/move on all lists of vm */
+ spinlock_t status_lock;
+
/* BOs who needs a validation */
struct list_head evicted;
@@ -268,7 +271,6 @@ struct amdgpu_vm {
/* regular invalidated BOs, but not yet updated in the PT */
struct list_head invalidated;
- spinlock_t invalidated_lock;
/* BO mappings freed, but not yet updated in the PT */
struct list_head freed;
--
2.35.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v2 1/7] drm/amdgpu: Rename vm invalidate lock to status_lock
2022-09-19 17:15 ` [PATCH v2 1/7] drm/amdgpu: Rename vm invalidate lock to status_lock Philip Yang
@ 2022-09-20 15:21 ` Felix Kuehling
0 siblings, 0 replies; 11+ messages in thread
From: Felix Kuehling @ 2022-09-20 15:21 UTC (permalink / raw)
To: Philip Yang, amd-gfx; +Cc: christian.koenig
Am 2022-09-19 um 13:15 schrieb Philip Yang:
> The vm status_lock will be used to protect all vm status lists.
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
> Christian König <christian.koenig@amd.com>
Was this meant to say "Reviewed-by: Christian ..."?
Patches 2-6 need proper patch descriptions. Something like: "Use
vm_status_lock to protect all vm_status state transitions to allow them
to happen without a reservation lock in unlocked page table updates."
Other than that, patches 1-6 are
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 30 +++++++++++++-------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +++-
> 2 files changed, 18 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 690fd4f639f1..596f1ea8babc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -225,9 +225,9 @@ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
> */
> static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
> {
> - spin_lock(&vm_bo->vm->invalidated_lock);
> + spin_lock(&vm_bo->vm->status_lock);
> list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
> - spin_unlock(&vm_bo->vm->invalidated_lock);
> + spin_unlock(&vm_bo->vm->status_lock);
> }
>
> /**
> @@ -256,9 +256,9 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
> */
> static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
> {
> - spin_lock(&vm_bo->vm->invalidated_lock);
> + spin_lock(&vm_bo->vm->status_lock);
> list_move(&vm_bo->vm_status, &vm_bo->vm->done);
> - spin_unlock(&vm_bo->vm->invalidated_lock);
> + spin_unlock(&vm_bo->vm->status_lock);
> }
>
> /**
> @@ -936,7 +936,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
> amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
> gtt_mem, cpu_mem);
> }
> - spin_lock(&vm->invalidated_lock);
> + spin_lock(&vm->status_lock);
> list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
> if (!bo_va->base.bo)
> continue;
> @@ -949,7 +949,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
> amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
> gtt_mem, cpu_mem);
> }
> - spin_unlock(&vm->invalidated_lock);
> + spin_unlock(&vm->status_lock);
> }
> /**
> * amdgpu_vm_bo_update - update all BO mappings in the vm page table
> @@ -1290,12 +1290,12 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
> return r;
> }
>
> - spin_lock(&vm->invalidated_lock);
> + spin_lock(&vm->status_lock);
> while (!list_empty(&vm->invalidated)) {
> bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
> base.vm_status);
> resv = bo_va->base.bo->tbo.base.resv;
> - spin_unlock(&vm->invalidated_lock);
> + spin_unlock(&vm->status_lock);
>
> /* Try to reserve the BO to avoid clearing its ptes */
> if (!amdgpu_vm_debug && dma_resv_trylock(resv))
> @@ -1310,9 +1310,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>
> if (!clear)
> dma_resv_unlock(resv);
> - spin_lock(&vm->invalidated_lock);
> + spin_lock(&vm->status_lock);
> }
> - spin_unlock(&vm->invalidated_lock);
> + spin_unlock(&vm->status_lock);
>
> return 0;
> }
> @@ -1763,9 +1763,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
> }
> }
>
> - spin_lock(&vm->invalidated_lock);
> + spin_lock(&vm->status_lock);
> list_del(&bo_va->base.vm_status);
> - spin_unlock(&vm->invalidated_lock);
> + spin_unlock(&vm->status_lock);
>
> list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
> list_del(&mapping->list);
> @@ -2019,7 +2019,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> INIT_LIST_HEAD(&vm->moved);
> INIT_LIST_HEAD(&vm->idle);
> INIT_LIST_HEAD(&vm->invalidated);
> - spin_lock_init(&vm->invalidated_lock);
> + spin_lock_init(&vm->status_lock);
> INIT_LIST_HEAD(&vm->freed);
> INIT_LIST_HEAD(&vm->done);
>
> @@ -2584,7 +2584,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
> id = 0;
>
> seq_puts(m, "\tInvalidated BOs:\n");
> - spin_lock(&vm->invalidated_lock);
> + spin_lock(&vm->status_lock);
> list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
> if (!bo_va->base.bo)
> continue;
> @@ -2599,7 +2599,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
> continue;
> total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
> }
> - spin_unlock(&vm->invalidated_lock);
> + spin_unlock(&vm->status_lock);
> total_done_objs = id;
>
> seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 9ecb7f663e19..e6dd112d865c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -254,6 +254,9 @@ struct amdgpu_vm {
> bool evicting;
> unsigned int saved_flags;
>
> + /* Lock to protect vm_bo add/del/move on all lists of vm */
> + spinlock_t status_lock;
> +
> /* BOs who needs a validation */
> struct list_head evicted;
>
> @@ -268,7 +271,6 @@ struct amdgpu_vm {
>
> /* regular invalidated BOs, but not yet updated in the PT */
> struct list_head invalidated;
> - spinlock_t invalidated_lock;
>
> /* BO mappings freed, but not yet updated in the PT */
> struct list_head freed;
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 2/7] drm/amdgpu: Use vm status_lock to protect relocated list
2022-09-19 17:15 [PATCH v2 0/7] Fix amdgpu_vm_pt_free warning Philip Yang
2022-09-19 17:15 ` [PATCH v2 1/7] drm/amdgpu: Rename vm invalidate lock to status_lock Philip Yang
@ 2022-09-19 17:15 ` Philip Yang
2022-09-19 17:15 ` [PATCH v2 3/7] drm/amdgpu: Use vm status_lock to protect vm idle list Philip Yang
` (4 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: Philip Yang @ 2022-09-19 17:15 UTC (permalink / raw)
To: amd-gfx; +Cc: Philip Yang, felix.kuehling, christian.koenig
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 596f1ea8babc..4a1cb20deb2d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -240,10 +240,13 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
{
- if (vm_bo->bo->parent)
+ if (vm_bo->bo->parent) {
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
- else
+ spin_unlock(&vm_bo->vm->status_lock);
+ } else {
amdgpu_vm_bo_idle(vm_bo);
+ }
}
/**
@@ -680,9 +683,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm_update_params params;
struct amdgpu_vm_bo_base *entry;
bool flush_tlb_needed = false;
+ LIST_HEAD(relocated);
int r, idx;
- if (list_empty(&vm->relocated))
+ spin_lock(&vm->status_lock);
+ list_splice_init(&vm->relocated, &relocated);
+ spin_unlock(&vm->status_lock);
+
+ if (list_empty(&relocated))
return 0;
if (!drm_dev_enter(adev_to_drm(adev), &idx))
@@ -697,7 +705,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
if (r)
goto error;
- list_for_each_entry(entry, &vm->relocated, vm_status) {
+ list_for_each_entry(entry, &relocated, vm_status) {
/* vm_flush_needed after updating moved PDEs */
flush_tlb_needed |= entry->moved;
@@ -713,9 +721,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
if (flush_tlb_needed)
atomic64_inc(&vm->tlb_seq);
- while (!list_empty(&vm->relocated)) {
- entry = list_first_entry(&vm->relocated,
- struct amdgpu_vm_bo_base,
+ while (!list_empty(&relocated)) {
+ entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base,
vm_status);
amdgpu_vm_bo_idle(entry);
}
@@ -912,6 +919,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
{
struct amdgpu_bo_va *bo_va, *tmp;
+ spin_lock(&vm->status_lock);
list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
if (!bo_va->base.bo)
continue;
@@ -936,7 +944,6 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
amdgpu_bo_get_memory(bo_va->base.bo, vram_mem,
gtt_mem, cpu_mem);
}
- spin_lock(&vm->status_lock);
list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
if (!bo_va->base.bo)
continue;
--
2.35.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 3/7] drm/amdgpu: Use vm status_lock to protect vm idle list
2022-09-19 17:15 [PATCH v2 0/7] Fix amdgpu_vm_pt_free warning Philip Yang
2022-09-19 17:15 ` [PATCH v2 1/7] drm/amdgpu: Rename vm invalidate lock to status_lock Philip Yang
2022-09-19 17:15 ` [PATCH v2 2/7] drm/amdgpu: Use vm status_lock to protect relocated list Philip Yang
@ 2022-09-19 17:15 ` Philip Yang
2022-09-19 17:15 ` [PATCH v2 4/7] drm/amdgpu: Use vm status_lock to protect vm moved list Philip Yang
` (3 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: Philip Yang @ 2022-09-19 17:15 UTC (permalink / raw)
To: amd-gfx; +Cc: Philip Yang, felix.kuehling, christian.koenig
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4a1cb20deb2d..c3412709e626 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -211,7 +211,9 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
{
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
+ spin_unlock(&vm_bo->vm->status_lock);
vm_bo->moved = false;
}
@@ -2554,6 +2556,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
unsigned int total_done_objs = 0;
unsigned int id = 0;
+ spin_lock(&vm->status_lock);
seq_puts(m, "\tIdle BOs:\n");
list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) {
if (!bo_va->base.bo)
@@ -2591,7 +2594,6 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m)
id = 0;
seq_puts(m, "\tInvalidated BOs:\n");
- spin_lock(&vm->status_lock);
list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) {
if (!bo_va->base.bo)
continue;
--
2.35.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 4/7] drm/amdgpu: Use vm status_lock to protect vm moved list
2022-09-19 17:15 [PATCH v2 0/7] Fix amdgpu_vm_pt_free warning Philip Yang
` (2 preceding siblings ...)
2022-09-19 17:15 ` [PATCH v2 3/7] drm/amdgpu: Use vm status_lock to protect vm idle list Philip Yang
@ 2022-09-19 17:15 ` Philip Yang
2022-09-19 17:15 ` [PATCH v2 5/7] drm/amdgpu: Use vm status_lock to protect vm evicted list Philip Yang
` (2 subsequent siblings)
6 siblings, 0 replies; 11+ messages in thread
From: Philip Yang @ 2022-09-19 17:15 UTC (permalink / raw)
To: amd-gfx; +Cc: Philip Yang, felix.kuehling, christian.koenig
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c3412709e626..168875115538 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -198,7 +198,9 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
*/
static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
{
+ spin_lock(&vm_bo->vm->status_lock);
list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
@@ -1287,19 +1289,24 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
- struct amdgpu_bo_va *bo_va, *tmp;
+ struct amdgpu_bo_va *bo_va;
struct dma_resv *resv;
bool clear;
int r;
- list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->moved)) {
+ bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va,
+ base.vm_status);
+ spin_unlock(&vm->status_lock);
+
/* Per VM BOs never need to bo cleared in the page tables */
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
+ spin_lock(&vm->status_lock);
}
- spin_lock(&vm->status_lock);
while (!list_empty(&vm->invalidated)) {
bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
base.vm_status);
@@ -1396,7 +1403,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
!bo_va->base.moved) {
- list_move(&bo_va->base.vm_status, &vm->moved);
+ amdgpu_vm_bo_moved(&bo_va->base);
}
trace_amdgpu_vm_bo_map(bo_va, mapping);
}
--
2.35.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 5/7] drm/amdgpu: Use vm status_lock to protect vm evicted list
2022-09-19 17:15 [PATCH v2 0/7] Fix amdgpu_vm_pt_free warning Philip Yang
` (3 preceding siblings ...)
2022-09-19 17:15 ` [PATCH v2 4/7] drm/amdgpu: Use vm status_lock to protect vm moved list Philip Yang
@ 2022-09-19 17:15 ` Philip Yang
2022-09-19 17:15 ` [PATCH v2 6/7] drm/amdgpu: Use vm status_lock to protect pt free Philip Yang
2022-09-19 17:15 ` [PATCH v2 7/7] drm/amdgpu: Fix amdgpu_vm_pt_free warning Philip Yang
6 siblings, 0 replies; 11+ messages in thread
From: Philip Yang @ 2022-09-19 17:15 UTC (permalink / raw)
To: amd-gfx; +Cc: Philip Yang, felix.kuehling, christian.koenig
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 27 +++++++++++++++++++++-----
1 file changed, 22 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 168875115538..b2e96682b9bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -183,10 +183,12 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
struct amdgpu_bo *bo = vm_bo->bo;
vm_bo->moved = true;
+ spin_lock(&vm_bo->vm->status_lock);
if (bo->tbo.type == ttm_bo_type_kernel)
list_move(&vm_bo->vm_status, &vm->evicted);
else
list_move_tail(&vm_bo->vm_status, &vm->evicted);
+ spin_unlock(&vm_bo->vm->status_lock);
}
/**
* amdgpu_vm_bo_moved - vm_bo is moved
@@ -370,12 +372,20 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int (*validate)(void *p, struct amdgpu_bo *bo),
void *param)
{
- struct amdgpu_vm_bo_base *bo_base, *tmp;
+ struct amdgpu_vm_bo_base *bo_base;
+ struct amdgpu_bo *shadow;
+ struct amdgpu_bo *bo;
int r;
- list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
- struct amdgpu_bo *bo = bo_base->bo;
- struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);
+ spin_lock(&vm->status_lock);
+ while (!list_empty(&vm->evicted)) {
+ bo_base = list_first_entry(&vm->evicted,
+ struct amdgpu_vm_bo_base,
+ vm_status);
+ spin_unlock(&vm->status_lock);
+
+ bo = bo_base->bo;
+ shadow = amdgpu_bo_shadowed(bo);
r = validate(param, bo);
if (r)
@@ -392,7 +402,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->update_funcs->map_table(to_amdgpu_bo_vm(bo));
amdgpu_vm_bo_relocated(bo_base);
}
+ spin_lock(&vm->status_lock);
}
+ spin_unlock(&vm->status_lock);
amdgpu_vm_eviction_lock(vm);
vm->evicting = false;
@@ -413,13 +425,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
*/
bool amdgpu_vm_ready(struct amdgpu_vm *vm)
{
+ bool empty;
bool ret;
amdgpu_vm_eviction_lock(vm);
ret = !vm->evicting;
amdgpu_vm_eviction_unlock(vm);
- return ret && list_empty(&vm->evicted);
+ spin_lock(&vm->status_lock);
+ empty = list_empty(&vm->evicted);
+ spin_unlock(&vm->status_lock);
+
+ return ret && empty;
}
/**
--
2.35.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 6/7] drm/amdgpu: Use vm status_lock to protect pt free
2022-09-19 17:15 [PATCH v2 0/7] Fix amdgpu_vm_pt_free warning Philip Yang
` (4 preceding siblings ...)
2022-09-19 17:15 ` [PATCH v2 5/7] drm/amdgpu: Use vm status_lock to protect vm evicted list Philip Yang
@ 2022-09-19 17:15 ` Philip Yang
2022-09-19 17:15 ` [PATCH v2 7/7] drm/amdgpu: Fix amdgpu_vm_pt_free warning Philip Yang
6 siblings, 0 replies; 11+ messages in thread
From: Philip Yang @ 2022-09-19 17:15 UTC (permalink / raw)
To: amd-gfx; +Cc: Philip Yang, felix.kuehling, christian.koenig
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 88de9f0d4728..61a4b7182d44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -637,7 +637,10 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
}
ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
entry->bo->vm_bo = NULL;
+
+ spin_lock(&entry->vm->status_lock);
list_del(&entry->vm_status);
+ spin_unlock(&entry->vm->status_lock);
amdgpu_bo_unref(&entry->bo);
}
--
2.35.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v2 7/7] drm/amdgpu: Fix amdgpu_vm_pt_free warning
2022-09-19 17:15 [PATCH v2 0/7] Fix amdgpu_vm_pt_free warning Philip Yang
` (5 preceding siblings ...)
2022-09-19 17:15 ` [PATCH v2 6/7] drm/amdgpu: Use vm status_lock to protect pt free Philip Yang
@ 2022-09-19 17:15 ` Philip Yang
2022-09-20 15:17 ` Felix Kuehling
2022-09-20 15:43 ` Christian König
6 siblings, 2 replies; 11+ messages in thread
From: Philip Yang @ 2022-09-19 17:15 UTC (permalink / raw)
To: amd-gfx; +Cc: Philip Yang, felix.kuehling, christian.koenig
Free page table BO from vm resv unlocked context generate below
warnings.
Add a pt_free_work in vm to free page table BO from vm->pt_freed list.
pass vm resv unlock status from page table update caller, and add vm_bo
entry to vm->pt_freed list and schedule the pt_free_work if calling with
vm resv unlocked.
WARNING: CPU: 12 PID: 3238 at
drivers/gpu/drm/ttm/ttm_bo.c:106 ttm_bo_set_bulk_move+0xa1/0xc0
Call Trace:
amdgpu_vm_pt_free+0x42/0xd0 [amdgpu]
amdgpu_vm_pt_free_dfs+0xb3/0xf0 [amdgpu]
amdgpu_vm_ptes_update+0x52d/0x850 [amdgpu]
amdgpu_vm_update_range+0x2a6/0x640 [amdgpu]
svm_range_unmap_from_gpus+0x110/0x300 [amdgpu]
svm_range_cpu_invalidate_pagetables+0x535/0x600 [amdgpu]
__mmu_notifier_invalidate_range_start+0x1cd/0x230
unmap_vmas+0x9d/0x140
unmap_region+0xa8/0x110
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 41 +++++++++++++++++++++--
3 files changed, 47 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b2e96682b9bb..83b0c5d86e48 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2055,6 +2055,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->freed);
INIT_LIST_HEAD(&vm->done);
+ INIT_LIST_HEAD(&vm->pt_freed);
+ INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
/* create scheduler entities for page table updates */
r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
@@ -2256,6 +2258,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
+ flush_work(&vm->pt_free_work);
+
root = amdgpu_bo_ref(vm->root.bo);
amdgpu_bo_reserve(root, true);
amdgpu_vm_set_pasid(adev, vm, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index e6dd112d865c..83acb7bd80fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -278,6 +278,10 @@ struct amdgpu_vm {
/* BOs which are invalidated, has been updated in the PTs */
struct list_head done;
+ /* PT BOs scheduled to free and fill with zero if vm_resv is not hold */
+ struct list_head pt_freed;
+ struct work_struct pt_free_work;
+
/* contains the page directory */
struct amdgpu_vm_bo_base root;
struct dma_fence *last_update;
@@ -473,6 +477,7 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
uint64_t start, uint64_t end,
uint64_t dst, uint64_t flags);
+void amdgpu_vm_pt_free_work(struct work_struct *work);
#if defined(CONFIG_DEBUG_FS)
void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 61a4b7182d44..358b91243e37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -644,6 +644,27 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
amdgpu_bo_unref(&entry->bo);
}
+void amdgpu_vm_pt_free_work(struct work_struct *work)
+{
+ struct amdgpu_vm_bo_base *entry, *next;
+ struct amdgpu_vm *vm;
+ LIST_HEAD(pt_freed);
+
+ vm = container_of(work, struct amdgpu_vm, pt_free_work);
+
+ spin_lock(&vm->status_lock);
+ list_splice_init(&vm->pt_freed, &pt_freed);
+ spin_unlock(&vm->status_lock);
+
+ /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */
+ amdgpu_bo_reserve(vm->root.bo, true);
+
+ list_for_each_entry_safe(entry, next, &pt_freed, vm_status)
+ amdgpu_vm_pt_free(entry);
+
+ amdgpu_bo_unreserve(vm->root.bo);
+}
+
/**
* amdgpu_vm_pt_free_dfs - free PD/PT levels
*
@@ -655,11 +676,24 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
*/
static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *start)
+ struct amdgpu_vm_pt_cursor *start,
+ bool unlocked)
{
struct amdgpu_vm_pt_cursor cursor;
struct amdgpu_vm_bo_base *entry;
+ if (unlocked) {
+ spin_lock(&vm->status_lock);
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
+ list_move(&entry->vm_status, &vm->pt_freed);
+
+ if (start)
+ list_move(&start->entry->vm_status, &vm->pt_freed);
+ spin_unlock(&vm->status_lock);
+ schedule_work(&vm->pt_free_work);
+ return;
+ }
+
for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
amdgpu_vm_pt_free(entry);
@@ -676,7 +710,7 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
*/
void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
- amdgpu_vm_pt_free_dfs(adev, vm, NULL);
+ amdgpu_vm_pt_free_dfs(adev, vm, NULL, false);
}
/**
@@ -969,7 +1003,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
if (cursor.entry->bo) {
params->table_freed = true;
amdgpu_vm_pt_free_dfs(adev, params->vm,
- &cursor);
+ &cursor,
+ params->unlocked);
}
amdgpu_vm_pt_next(adev, &cursor);
}
--
2.35.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v2 7/7] drm/amdgpu: Fix amdgpu_vm_pt_free warning
2022-09-19 17:15 ` [PATCH v2 7/7] drm/amdgpu: Fix amdgpu_vm_pt_free warning Philip Yang
@ 2022-09-20 15:17 ` Felix Kuehling
2022-09-20 15:43 ` Christian König
1 sibling, 0 replies; 11+ messages in thread
From: Felix Kuehling @ 2022-09-20 15:17 UTC (permalink / raw)
To: Philip Yang, amd-gfx; +Cc: christian.koenig
Am 2022-09-19 um 13:15 schrieb Philip Yang:
> Free page table BO from vm resv unlocked context generate below
> warnings.
>
> Add a pt_free_work in vm to free page table BO from vm->pt_freed list.
> pass vm resv unlock status from page table update caller, and add vm_bo
> entry to vm->pt_freed list and schedule the pt_free_work if calling with
> vm resv unlocked.
>
> WARNING: CPU: 12 PID: 3238 at
> drivers/gpu/drm/ttm/ttm_bo.c:106 ttm_bo_set_bulk_move+0xa1/0xc0
> Call Trace:
> amdgpu_vm_pt_free+0x42/0xd0 [amdgpu]
> amdgpu_vm_pt_free_dfs+0xb3/0xf0 [amdgpu]
> amdgpu_vm_ptes_update+0x52d/0x850 [amdgpu]
> amdgpu_vm_update_range+0x2a6/0x640 [amdgpu]
> svm_range_unmap_from_gpus+0x110/0x300 [amdgpu]
> svm_range_cpu_invalidate_pagetables+0x535/0x600 [amdgpu]
> __mmu_notifier_invalidate_range_start+0x1cd/0x230
> unmap_vmas+0x9d/0x140
> unmap_region+0xa8/0x110
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
This patch is
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 41 +++++++++++++++++++++--
> 3 files changed, 47 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b2e96682b9bb..83b0c5d86e48 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2055,6 +2055,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> spin_lock_init(&vm->status_lock);
> INIT_LIST_HEAD(&vm->freed);
> INIT_LIST_HEAD(&vm->done);
> + INIT_LIST_HEAD(&vm->pt_freed);
> + INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
>
> /* create scheduler entities for page table updates */
> r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
> @@ -2256,6 +2258,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>
> amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
>
> + flush_work(&vm->pt_free_work);
> +
> root = amdgpu_bo_ref(vm->root.bo);
> amdgpu_bo_reserve(root, true);
> amdgpu_vm_set_pasid(adev, vm, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index e6dd112d865c..83acb7bd80fe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -278,6 +278,10 @@ struct amdgpu_vm {
> /* BOs which are invalidated, has been updated in the PTs */
> struct list_head done;
>
> + /* PT BOs scheduled to free and fill with zero if vm_resv is not hold */
> + struct list_head pt_freed;
> + struct work_struct pt_free_work;
> +
> /* contains the page directory */
> struct amdgpu_vm_bo_base root;
> struct dma_fence *last_update;
> @@ -473,6 +477,7 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
> int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
> uint64_t start, uint64_t end,
> uint64_t dst, uint64_t flags);
> +void amdgpu_vm_pt_free_work(struct work_struct *work);
>
> #if defined(CONFIG_DEBUG_FS)
> void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> index 61a4b7182d44..358b91243e37 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> @@ -644,6 +644,27 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
> amdgpu_bo_unref(&entry->bo);
> }
>
> +void amdgpu_vm_pt_free_work(struct work_struct *work)
> +{
> + struct amdgpu_vm_bo_base *entry, *next;
> + struct amdgpu_vm *vm;
> + LIST_HEAD(pt_freed);
> +
> + vm = container_of(work, struct amdgpu_vm, pt_free_work);
> +
> + spin_lock(&vm->status_lock);
> + list_splice_init(&vm->pt_freed, &pt_freed);
> + spin_unlock(&vm->status_lock);
> +
> + /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */
> + amdgpu_bo_reserve(vm->root.bo, true);
> +
> + list_for_each_entry_safe(entry, next, &pt_freed, vm_status)
> + amdgpu_vm_pt_free(entry);
> +
> + amdgpu_bo_unreserve(vm->root.bo);
> +}
> +
> /**
> * amdgpu_vm_pt_free_dfs - free PD/PT levels
> *
> @@ -655,11 +676,24 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
> */
> static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
> struct amdgpu_vm *vm,
> - struct amdgpu_vm_pt_cursor *start)
> + struct amdgpu_vm_pt_cursor *start,
> + bool unlocked)
> {
> struct amdgpu_vm_pt_cursor cursor;
> struct amdgpu_vm_bo_base *entry;
>
> + if (unlocked) {
> + spin_lock(&vm->status_lock);
> + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
> + list_move(&entry->vm_status, &vm->pt_freed);
> +
> + if (start)
> + list_move(&start->entry->vm_status, &vm->pt_freed);
> + spin_unlock(&vm->status_lock);
> + schedule_work(&vm->pt_free_work);
> + return;
> + }
> +
> for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
> amdgpu_vm_pt_free(entry);
>
> @@ -676,7 +710,7 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
> */
> void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> {
> - amdgpu_vm_pt_free_dfs(adev, vm, NULL);
> + amdgpu_vm_pt_free_dfs(adev, vm, NULL, false);
> }
>
> /**
> @@ -969,7 +1003,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
> if (cursor.entry->bo) {
> params->table_freed = true;
> amdgpu_vm_pt_free_dfs(adev, params->vm,
> - &cursor);
> + &cursor,
> + params->unlocked);
> }
> amdgpu_vm_pt_next(adev, &cursor);
> }
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 7/7] drm/amdgpu: Fix amdgpu_vm_pt_free warning
2022-09-19 17:15 ` [PATCH v2 7/7] drm/amdgpu: Fix amdgpu_vm_pt_free warning Philip Yang
2022-09-20 15:17 ` Felix Kuehling
@ 2022-09-20 15:43 ` Christian König
1 sibling, 0 replies; 11+ messages in thread
From: Christian König @ 2022-09-20 15:43 UTC (permalink / raw)
To: Philip Yang, amd-gfx; +Cc: felix.kuehling, christian.koenig
Am 19.09.22 um 19:15 schrieb Philip Yang:
> Free page table BO from vm resv unlocked context generate below
> warnings.
>
> Add a pt_free_work in vm to free page table BO from vm->pt_freed list.
> pass vm resv unlock status from page table update caller, and add vm_bo
> entry to vm->pt_freed list and schedule the pt_free_work if calling with
> vm resv unlocked.
>
> WARNING: CPU: 12 PID: 3238 at
> drivers/gpu/drm/ttm/ttm_bo.c:106 ttm_bo_set_bulk_move+0xa1/0xc0
> Call Trace:
> amdgpu_vm_pt_free+0x42/0xd0 [amdgpu]
> amdgpu_vm_pt_free_dfs+0xb3/0xf0 [amdgpu]
> amdgpu_vm_ptes_update+0x52d/0x850 [amdgpu]
> amdgpu_vm_update_range+0x2a6/0x640 [amdgpu]
> svm_range_unmap_from_gpus+0x110/0x300 [amdgpu]
> svm_range_cpu_invalidate_pagetables+0x535/0x600 [amdgpu]
> __mmu_notifier_invalidate_range_start+0x1cd/0x230
> unmap_vmas+0x9d/0x140
> unmap_region+0xa8/0x110
>
> Signed-off-by: Philip Yang <Philip.Yang@amd.com>
With Felix comments fixed. Reviewed-by: Christian König
<christian.koenig@amd.com> for the entire series.
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 41 +++++++++++++++++++++--
> 3 files changed, 47 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index b2e96682b9bb..83b0c5d86e48 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2055,6 +2055,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> spin_lock_init(&vm->status_lock);
> INIT_LIST_HEAD(&vm->freed);
> INIT_LIST_HEAD(&vm->done);
> + INIT_LIST_HEAD(&vm->pt_freed);
> + INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work);
>
> /* create scheduler entities for page table updates */
> r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL,
> @@ -2256,6 +2258,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>
> amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
>
> + flush_work(&vm->pt_free_work);
> +
> root = amdgpu_bo_ref(vm->root.bo);
> amdgpu_bo_reserve(root, true);
> amdgpu_vm_set_pasid(adev, vm, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index e6dd112d865c..83acb7bd80fe 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -278,6 +278,10 @@ struct amdgpu_vm {
> /* BOs which are invalidated, has been updated in the PTs */
> struct list_head done;
>
> + /* PT BOs scheduled to free and fill with zero if vm_resv is not hold */
> + struct list_head pt_freed;
> + struct work_struct pt_free_work;
> +
> /* contains the page directory */
> struct amdgpu_vm_bo_base root;
> struct dma_fence *last_update;
> @@ -473,6 +477,7 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
> int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
> uint64_t start, uint64_t end,
> uint64_t dst, uint64_t flags);
> +void amdgpu_vm_pt_free_work(struct work_struct *work);
>
> #if defined(CONFIG_DEBUG_FS)
> void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> index 61a4b7182d44..358b91243e37 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> @@ -644,6 +644,27 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
> amdgpu_bo_unref(&entry->bo);
> }
>
> +void amdgpu_vm_pt_free_work(struct work_struct *work)
> +{
> + struct amdgpu_vm_bo_base *entry, *next;
> + struct amdgpu_vm *vm;
> + LIST_HEAD(pt_freed);
> +
> + vm = container_of(work, struct amdgpu_vm, pt_free_work);
> +
> + spin_lock(&vm->status_lock);
> + list_splice_init(&vm->pt_freed, &pt_freed);
> + spin_unlock(&vm->status_lock);
> +
> + /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */
> + amdgpu_bo_reserve(vm->root.bo, true);
> +
> + list_for_each_entry_safe(entry, next, &pt_freed, vm_status)
> + amdgpu_vm_pt_free(entry);
> +
> + amdgpu_bo_unreserve(vm->root.bo);
> +}
> +
> /**
> * amdgpu_vm_pt_free_dfs - free PD/PT levels
> *
> @@ -655,11 +676,24 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
> */
> static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
> struct amdgpu_vm *vm,
> - struct amdgpu_vm_pt_cursor *start)
> + struct amdgpu_vm_pt_cursor *start,
> + bool unlocked)
> {
> struct amdgpu_vm_pt_cursor cursor;
> struct amdgpu_vm_bo_base *entry;
>
> + if (unlocked) {
> + spin_lock(&vm->status_lock);
> + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
> + list_move(&entry->vm_status, &vm->pt_freed);
> +
> + if (start)
> + list_move(&start->entry->vm_status, &vm->pt_freed);
> + spin_unlock(&vm->status_lock);
> + schedule_work(&vm->pt_free_work);
> + return;
> + }
> +
> for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
> amdgpu_vm_pt_free(entry);
>
> @@ -676,7 +710,7 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
> */
> void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> {
> - amdgpu_vm_pt_free_dfs(adev, vm, NULL);
> + amdgpu_vm_pt_free_dfs(adev, vm, NULL, false);
> }
>
> /**
> @@ -969,7 +1003,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
> if (cursor.entry->bo) {
> params->table_freed = true;
> amdgpu_vm_pt_free_dfs(adev, params->vm,
> - &cursor);
> + &cursor,
> + params->unlocked);
> }
> amdgpu_vm_pt_next(adev, &cursor);
> }
^ permalink raw reply [flat|nested] 11+ messages in thread