All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdkfd: Use mmu_notifier_get
@ 2021-02-12  6:40 ` Felix Kuehling
  0 siblings, 0 replies; 6+ messages in thread
From: Felix Kuehling @ 2021-02-12  6:40 UTC (permalink / raw)
  To: dri-devel, amd-gfx

We use mmu_notifier_put to free the MMU notifier. That needs to be
paired with mmu_notifier_get to work correctly. Othewrise the next patch
would cause a kernel oops.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 33 +++++++++++++++++++-----
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 2807e1c4d59b..145cd0a17d50 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1011,6 +1011,16 @@ static void kfd_process_ref_release(struct kref *ref)
 	queue_work(kfd_process_wq, &p->release_work);
 }
 
+static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm)
+{
+	int idx = srcu_read_lock(&kfd_processes_srcu);
+	struct kfd_process *p = find_process_by_mm(mm);
+
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+
+	return p ? &p->mmu_notifier : ERR_PTR(-ESRCH);
+}
+
 static void kfd_process_free_notifier(struct mmu_notifier *mn)
 {
 	kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
@@ -1075,6 +1085,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 
 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
 	.release = kfd_process_notifier_release,
+	.alloc_notifier = kfd_process_alloc_notifier,
 	.free_notifier = kfd_process_free_notifier,
 };
 
@@ -1152,6 +1163,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
 static struct kfd_process *create_process(const struct task_struct *thread)
 {
 	struct kfd_process *process;
+	struct mmu_notifier *mn;
 	int err = -ENOMEM;
 
 	process = kzalloc(sizeof(*process), GFP_KERNEL);
@@ -1182,19 +1194,28 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 	if (err != 0)
 		goto err_init_apertures;
 
-	/* Must be last, have to use release destruction after this */
-	process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
-	err = mmu_notifier_register(&process->mmu_notifier, process->mm);
-	if (err)
+	/* alloc_notifier needs to find the process in the hash table */
+	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
+			(uintptr_t)process->mm);
+
+	/* MMU notifier registration must be the last call that can fail
+	 * because after this point we cannot unwind the process creation.
+	 * After this point, mmu_notifier_put will trigger the cleanup by
+	 * dropping the last process reference in the free_notifier.
+	 */
+	mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm);
+	if (IS_ERR(mn)) {
+		err = PTR_ERR(mn);
 		goto err_register_notifier;
+	}
+	BUG_ON(mn != &process->mmu_notifier);
 
 	get_task_struct(process->lead_thread);
-	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
-			(uintptr_t)process->mm);
 
 	return process;
 
 err_register_notifier:
+	hash_del_rcu(&process->kfd_processes);
 	kfd_process_free_outstanding_kfd_bos(process);
 	kfd_process_destroy_pdds(process);
 err_init_apertures:
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 1/2] drm/amdkfd: Use mmu_notifier_get
@ 2021-02-12  6:40 ` Felix Kuehling
  0 siblings, 0 replies; 6+ messages in thread
From: Felix Kuehling @ 2021-02-12  6:40 UTC (permalink / raw)
  To: dri-devel, amd-gfx

We use mmu_notifier_put to free the MMU notifier. That needs to be
paired with mmu_notifier_get to work correctly. Othewrise the next patch
would cause a kernel oops.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 33 +++++++++++++++++++-----
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 2807e1c4d59b..145cd0a17d50 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1011,6 +1011,16 @@ static void kfd_process_ref_release(struct kref *ref)
 	queue_work(kfd_process_wq, &p->release_work);
 }
 
+static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm)
+{
+	int idx = srcu_read_lock(&kfd_processes_srcu);
+	struct kfd_process *p = find_process_by_mm(mm);
+
+	srcu_read_unlock(&kfd_processes_srcu, idx);
+
+	return p ? &p->mmu_notifier : ERR_PTR(-ESRCH);
+}
+
 static void kfd_process_free_notifier(struct mmu_notifier *mn)
 {
 	kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
@@ -1075,6 +1085,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 
 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
 	.release = kfd_process_notifier_release,
+	.alloc_notifier = kfd_process_alloc_notifier,
 	.free_notifier = kfd_process_free_notifier,
 };
 
@@ -1152,6 +1163,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
 static struct kfd_process *create_process(const struct task_struct *thread)
 {
 	struct kfd_process *process;
+	struct mmu_notifier *mn;
 	int err = -ENOMEM;
 
 	process = kzalloc(sizeof(*process), GFP_KERNEL);
@@ -1182,19 +1194,28 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 	if (err != 0)
 		goto err_init_apertures;
 
-	/* Must be last, have to use release destruction after this */
-	process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
-	err = mmu_notifier_register(&process->mmu_notifier, process->mm);
-	if (err)
+	/* alloc_notifier needs to find the process in the hash table */
+	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
+			(uintptr_t)process->mm);
+
+	/* MMU notifier registration must be the last call that can fail
+	 * because after this point we cannot unwind the process creation.
+	 * After this point, mmu_notifier_put will trigger the cleanup by
+	 * dropping the last process reference in the free_notifier.
+	 */
+	mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm);
+	if (IS_ERR(mn)) {
+		err = PTR_ERR(mn);
 		goto err_register_notifier;
+	}
+	BUG_ON(mn != &process->mmu_notifier);
 
 	get_task_struct(process->lead_thread);
-	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
-			(uintptr_t)process->mm);
 
 	return process;
 
 err_register_notifier:
+	hash_del_rcu(&process->kfd_processes);
 	kfd_process_free_outstanding_kfd_bos(process);
 	kfd_process_destroy_pdds(process);
 err_init_apertures:
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] drm/amdkfd: Cleanup kfd_process if init_cwsr_apu fails
  2021-02-12  6:40 ` Felix Kuehling
@ 2021-02-12  6:40   ` Felix Kuehling
  -1 siblings, 0 replies; 6+ messages in thread
From: Felix Kuehling @ 2021-02-12  6:40 UTC (permalink / raw)
  To: dri-devel, amd-gfx

If init_cwsr_apu fails, we currently leave the kfd_process structure in
place anyway. The next kfd_open will then succeed, using the existing
kfd_process structure. Fix that by cleaning up the kfd_process after a
failure in init_cwsr_apu.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 145cd0a17d50..a4d7682289bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -775,10 +775,8 @@ struct kfd_process *kfd_create_process(struct file *filep)
 			goto out;
 
 		ret = kfd_process_init_cwsr_apu(process, filep);
-		if (ret) {
-			process = ERR_PTR(ret);
-			goto out;
-		}
+		if (ret)
+			goto out_destroy;
 
 		if (!procfs.kobj)
 			goto out;
@@ -826,6 +824,14 @@ struct kfd_process *kfd_create_process(struct file *filep)
 	mutex_unlock(&kfd_processes_mutex);
 
 	return process;
+
+out_destroy:
+	hash_del_rcu(&process->kfd_processes);
+	mutex_unlock(&kfd_processes_mutex);
+	synchronize_srcu(&kfd_processes_srcu);
+	/* kfd_process_free_notifier will trigger the cleanup */
+	mmu_notifier_put(&process->mmu_notifier);
+	return ERR_PTR(ret);
 }
 
 struct kfd_process *kfd_get_process(const struct task_struct *thread)
-- 
2.17.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] drm/amdkfd: Cleanup kfd_process if init_cwsr_apu fails
@ 2021-02-12  6:40   ` Felix Kuehling
  0 siblings, 0 replies; 6+ messages in thread
From: Felix Kuehling @ 2021-02-12  6:40 UTC (permalink / raw)
  To: dri-devel, amd-gfx

If init_cwsr_apu fails, we currently leave the kfd_process structure in
place anyway. The next kfd_open will then succeed, using the existing
kfd_process structure. Fix that by cleaning up the kfd_process after a
failure in init_cwsr_apu.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 145cd0a17d50..a4d7682289bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -775,10 +775,8 @@ struct kfd_process *kfd_create_process(struct file *filep)
 			goto out;
 
 		ret = kfd_process_init_cwsr_apu(process, filep);
-		if (ret) {
-			process = ERR_PTR(ret);
-			goto out;
-		}
+		if (ret)
+			goto out_destroy;
 
 		if (!procfs.kobj)
 			goto out;
@@ -826,6 +824,14 @@ struct kfd_process *kfd_create_process(struct file *filep)
 	mutex_unlock(&kfd_processes_mutex);
 
 	return process;
+
+out_destroy:
+	hash_del_rcu(&process->kfd_processes);
+	mutex_unlock(&kfd_processes_mutex);
+	synchronize_srcu(&kfd_processes_srcu);
+	/* kfd_process_free_notifier will trigger the cleanup */
+	mmu_notifier_put(&process->mmu_notifier);
+	return ERR_PTR(ret);
 }
 
 struct kfd_process *kfd_get_process(const struct task_struct *thread)
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] drm/amdkfd: Cleanup kfd_process if init_cwsr_apu fails
  2021-02-12  6:40   ` Felix Kuehling
@ 2021-02-16 19:50     ` philip yang
  -1 siblings, 0 replies; 6+ messages in thread
From: philip yang @ 2021-02-16 19:50 UTC (permalink / raw)
  To: Felix Kuehling, dri-devel, amd-gfx

[-- Attachment #1: Type: text/html, Size: 2061 bytes --]

[-- Attachment #2: Type: text/plain, Size: 160 bytes --]

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/2] drm/amdkfd: Cleanup kfd_process if init_cwsr_apu fails
@ 2021-02-16 19:50     ` philip yang
  0 siblings, 0 replies; 6+ messages in thread
From: philip yang @ 2021-02-16 19:50 UTC (permalink / raw)
  To: Felix Kuehling, dri-devel, amd-gfx

[-- Attachment #1: Type: text/html, Size: 2061 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-02-16 19:50 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-12  6:40 [PATCH 1/2] drm/amdkfd: Use mmu_notifier_get Felix Kuehling
2021-02-12  6:40 ` Felix Kuehling
2021-02-12  6:40 ` [PATCH 2/2] drm/amdkfd: Cleanup kfd_process if init_cwsr_apu fails Felix Kuehling
2021-02-12  6:40   ` Felix Kuehling
2021-02-16 19:50   ` philip yang
2021-02-16 19:50     ` philip yang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.