* [PATCH 1/2] drm/amdkfd: Use mmu_notifier_get
@ 2021-02-12 6:40 ` Felix Kuehling
0 siblings, 0 replies; 6+ messages in thread
From: Felix Kuehling @ 2021-02-12 6:40 UTC (permalink / raw)
To: dri-devel, amd-gfx
We use mmu_notifier_put to free the MMU notifier. That needs to be
paired with mmu_notifier_get to work correctly. Othewrise the next patch
would cause a kernel oops.
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 33 +++++++++++++++++++-----
1 file changed, 27 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 2807e1c4d59b..145cd0a17d50 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1011,6 +1011,16 @@ static void kfd_process_ref_release(struct kref *ref)
queue_work(kfd_process_wq, &p->release_work);
}
+static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm)
+{
+ int idx = srcu_read_lock(&kfd_processes_srcu);
+ struct kfd_process *p = find_process_by_mm(mm);
+
+ srcu_read_unlock(&kfd_processes_srcu, idx);
+
+ return p ? &p->mmu_notifier : ERR_PTR(-ESRCH);
+}
+
static void kfd_process_free_notifier(struct mmu_notifier *mn)
{
kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
@@ -1075,6 +1085,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
.release = kfd_process_notifier_release,
+ .alloc_notifier = kfd_process_alloc_notifier,
.free_notifier = kfd_process_free_notifier,
};
@@ -1152,6 +1163,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
static struct kfd_process *create_process(const struct task_struct *thread)
{
struct kfd_process *process;
+ struct mmu_notifier *mn;
int err = -ENOMEM;
process = kzalloc(sizeof(*process), GFP_KERNEL);
@@ -1182,19 +1194,28 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (err != 0)
goto err_init_apertures;
- /* Must be last, have to use release destruction after this */
- process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
- err = mmu_notifier_register(&process->mmu_notifier, process->mm);
- if (err)
+ /* alloc_notifier needs to find the process in the hash table */
+ hash_add_rcu(kfd_processes_table, &process->kfd_processes,
+ (uintptr_t)process->mm);
+
+ /* MMU notifier registration must be the last call that can fail
+ * because after this point we cannot unwind the process creation.
+ * After this point, mmu_notifier_put will trigger the cleanup by
+ * dropping the last process reference in the free_notifier.
+ */
+ mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm);
+ if (IS_ERR(mn)) {
+ err = PTR_ERR(mn);
goto err_register_notifier;
+ }
+ BUG_ON(mn != &process->mmu_notifier);
get_task_struct(process->lead_thread);
- hash_add_rcu(kfd_processes_table, &process->kfd_processes,
- (uintptr_t)process->mm);
return process;
err_register_notifier:
+ hash_del_rcu(&process->kfd_processes);
kfd_process_free_outstanding_kfd_bos(process);
kfd_process_destroy_pdds(process);
err_init_apertures:
--
2.17.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 1/2] drm/amdkfd: Use mmu_notifier_get
@ 2021-02-12 6:40 ` Felix Kuehling
0 siblings, 0 replies; 6+ messages in thread
From: Felix Kuehling @ 2021-02-12 6:40 UTC (permalink / raw)
To: dri-devel, amd-gfx
We use mmu_notifier_put to free the MMU notifier. That needs to be
paired with mmu_notifier_get to work correctly. Othewrise the next patch
would cause a kernel oops.
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 33 +++++++++++++++++++-----
1 file changed, 27 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 2807e1c4d59b..145cd0a17d50 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1011,6 +1011,16 @@ static void kfd_process_ref_release(struct kref *ref)
queue_work(kfd_process_wq, &p->release_work);
}
+static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm)
+{
+ int idx = srcu_read_lock(&kfd_processes_srcu);
+ struct kfd_process *p = find_process_by_mm(mm);
+
+ srcu_read_unlock(&kfd_processes_srcu, idx);
+
+ return p ? &p->mmu_notifier : ERR_PTR(-ESRCH);
+}
+
static void kfd_process_free_notifier(struct mmu_notifier *mn)
{
kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
@@ -1075,6 +1085,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
.release = kfd_process_notifier_release,
+ .alloc_notifier = kfd_process_alloc_notifier,
.free_notifier = kfd_process_free_notifier,
};
@@ -1152,6 +1163,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
static struct kfd_process *create_process(const struct task_struct *thread)
{
struct kfd_process *process;
+ struct mmu_notifier *mn;
int err = -ENOMEM;
process = kzalloc(sizeof(*process), GFP_KERNEL);
@@ -1182,19 +1194,28 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (err != 0)
goto err_init_apertures;
- /* Must be last, have to use release destruction after this */
- process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
- err = mmu_notifier_register(&process->mmu_notifier, process->mm);
- if (err)
+ /* alloc_notifier needs to find the process in the hash table */
+ hash_add_rcu(kfd_processes_table, &process->kfd_processes,
+ (uintptr_t)process->mm);
+
+ /* MMU notifier registration must be the last call that can fail
+ * because after this point we cannot unwind the process creation.
+ * After this point, mmu_notifier_put will trigger the cleanup by
+ * dropping the last process reference in the free_notifier.
+ */
+ mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm);
+ if (IS_ERR(mn)) {
+ err = PTR_ERR(mn);
goto err_register_notifier;
+ }
+ BUG_ON(mn != &process->mmu_notifier);
get_task_struct(process->lead_thread);
- hash_add_rcu(kfd_processes_table, &process->kfd_processes,
- (uintptr_t)process->mm);
return process;
err_register_notifier:
+ hash_del_rcu(&process->kfd_processes);
kfd_process_free_outstanding_kfd_bos(process);
kfd_process_destroy_pdds(process);
err_init_apertures:
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/2] drm/amdkfd: Cleanup kfd_process if init_cwsr_apu fails
2021-02-12 6:40 ` Felix Kuehling
@ 2021-02-12 6:40 ` Felix Kuehling
-1 siblings, 0 replies; 6+ messages in thread
From: Felix Kuehling @ 2021-02-12 6:40 UTC (permalink / raw)
To: dri-devel, amd-gfx
If init_cwsr_apu fails, we currently leave the kfd_process structure in
place anyway. The next kfd_open will then succeed, using the existing
kfd_process structure. Fix that by cleaning up the kfd_process after a
failure in init_cwsr_apu.
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 145cd0a17d50..a4d7682289bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -775,10 +775,8 @@ struct kfd_process *kfd_create_process(struct file *filep)
goto out;
ret = kfd_process_init_cwsr_apu(process, filep);
- if (ret) {
- process = ERR_PTR(ret);
- goto out;
- }
+ if (ret)
+ goto out_destroy;
if (!procfs.kobj)
goto out;
@@ -826,6 +824,14 @@ struct kfd_process *kfd_create_process(struct file *filep)
mutex_unlock(&kfd_processes_mutex);
return process;
+
+out_destroy:
+ hash_del_rcu(&process->kfd_processes);
+ mutex_unlock(&kfd_processes_mutex);
+ synchronize_srcu(&kfd_processes_srcu);
+ /* kfd_process_free_notifier will trigger the cleanup */
+ mmu_notifier_put(&process->mmu_notifier);
+ return ERR_PTR(ret);
}
struct kfd_process *kfd_get_process(const struct task_struct *thread)
--
2.17.1
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/2] drm/amdkfd: Cleanup kfd_process if init_cwsr_apu fails
@ 2021-02-12 6:40 ` Felix Kuehling
0 siblings, 0 replies; 6+ messages in thread
From: Felix Kuehling @ 2021-02-12 6:40 UTC (permalink / raw)
To: dri-devel, amd-gfx
If init_cwsr_apu fails, we currently leave the kfd_process structure in
place anyway. The next kfd_open will then succeed, using the existing
kfd_process structure. Fix that by cleaning up the kfd_process after a
failure in init_cwsr_apu.
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 145cd0a17d50..a4d7682289bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -775,10 +775,8 @@ struct kfd_process *kfd_create_process(struct file *filep)
goto out;
ret = kfd_process_init_cwsr_apu(process, filep);
- if (ret) {
- process = ERR_PTR(ret);
- goto out;
- }
+ if (ret)
+ goto out_destroy;
if (!procfs.kobj)
goto out;
@@ -826,6 +824,14 @@ struct kfd_process *kfd_create_process(struct file *filep)
mutex_unlock(&kfd_processes_mutex);
return process;
+
+out_destroy:
+ hash_del_rcu(&process->kfd_processes);
+ mutex_unlock(&kfd_processes_mutex);
+ synchronize_srcu(&kfd_processes_srcu);
+ /* kfd_process_free_notifier will trigger the cleanup */
+ mmu_notifier_put(&process->mmu_notifier);
+ return ERR_PTR(ret);
}
struct kfd_process *kfd_get_process(const struct task_struct *thread)
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH 2/2] drm/amdkfd: Cleanup kfd_process if init_cwsr_apu fails
2021-02-12 6:40 ` Felix Kuehling
@ 2021-02-16 19:50 ` philip yang
-1 siblings, 0 replies; 6+ messages in thread
From: philip yang @ 2021-02-16 19:50 UTC (permalink / raw)
To: Felix Kuehling, dri-devel, amd-gfx
[-- Attachment #1: Type: text/html, Size: 2061 bytes --]
[-- Attachment #2: Type: text/plain, Size: 160 bytes --]
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 2/2] drm/amdkfd: Cleanup kfd_process if init_cwsr_apu fails
@ 2021-02-16 19:50 ` philip yang
0 siblings, 0 replies; 6+ messages in thread
From: philip yang @ 2021-02-16 19:50 UTC (permalink / raw)
To: Felix Kuehling, dri-devel, amd-gfx
[-- Attachment #1: Type: text/html, Size: 2061 bytes --]
[-- Attachment #2: Type: text/plain, Size: 154 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2021-02-16 19:50 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-12 6:40 [PATCH 1/2] drm/amdkfd: Use mmu_notifier_get Felix Kuehling
2021-02-12 6:40 ` Felix Kuehling
2021-02-12 6:40 ` [PATCH 2/2] drm/amdkfd: Cleanup kfd_process if init_cwsr_apu fails Felix Kuehling
2021-02-12 6:40 ` Felix Kuehling
2021-02-16 19:50 ` philip yang
2021-02-16 19:50 ` philip yang
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.