iommu.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed
From: Jason Gunthorpe <jgg@ziepe.ca>
To: linux-mm@kvack.org
Cc: "Andrea Arcangeli" <aarcange@redhat.com>,
	"David (ChunMing) Zhou" <David1.Zhou@amd.com>,
	"Ralph Campbell" <rcampbell@nvidia.com>,
	"Dimitri Sivanich" <sivanich@sgi.com>,
	"Gavin Shan" <shangw@linux.vnet.ibm.com>,
	"Andrea Righi" <andrea@betterlinux.com>,
	linux-rdma@vger.kernel.org, "John Hubbard" <jhubbard@nvidia.com>,
	"Kuehling, Felix" <Felix.Kuehling@amd.com>,
	linux-kernel@vger.kernel.org, dri-devel@lists.freedesktop.org,
	"Christian König" <christian.koenig@amd.com>,
	"Jérôme Glisse" <jglisse@redhat.com>,
	iommu@lists.linux-foundation.org, amd-gfx@lists.freedesktop.org,
	"Jason Gunthorpe" <jgg@mellanox.com>,
	"Alex Deucher" <alexander.deucher@amd.com>,
	intel-gfx@lists.freedesktop.org, "Christoph Hellwig" <hch@lst.de>
Subject: [PATCH v3 hmm 09/11] drm/amdkfd: fix a use after free race with mmu_notifer unregister
Date: Tue,  6 Aug 2019 20:15:46 -0300	[thread overview]
Message-ID: <20190806231548.25242-10-jgg@ziepe.ca> (raw)
In-Reply-To: <20190806231548.25242-1-jgg@ziepe.ca>

From: Jason Gunthorpe <jgg@mellanox.com>

When using mmu_notifer_unregister_no_release() the caller must ensure
there is a SRCU synchronize before the mn memory is freed, otherwise use
after free races are possible, for instance:

     CPU0                                      CPU1
                                      invalidate_range_start
                                         hlist_for_each_entry_rcu(..)
 mmu_notifier_unregister_no_release(&p->mn)
 kfree(mn)
                                      if (mn->ops->invalidate_range_end)

The error unwind in amdkfd misses the SRCU synchronization.

amdkfd keeps the kfd_process around until the mm is released, so split the
flow to fully initialize the kfd_process and register it for find_process,
and with the notifier. Past this point the kfd_process does not need to be
cleaned up as it is fully ready.

The final failable step does a vm_mmap() and does not seem to impact the
kfd_process global state. Since it also cannot be undone (and already has
problems with undo if it internally fails), it has to be last.

This way we don't have to try to unwind the mmu_notifier_register() and
avoid the problem with the SRCU.

Along the way this also fixes various other error unwind bugs in the flow.

Fixes: 45102048f77e ("amdkfd: Add process queue manager module")
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 78 +++++++++++-------------
 1 file changed, 37 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 8f1076c0c88a25..c06e6190f21ffa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -62,8 +62,8 @@ static struct workqueue_struct *kfd_restore_wq;
 
 static struct kfd_process *find_process(const struct task_struct *thread);
 static void kfd_process_ref_release(struct kref *ref);
-static struct kfd_process *create_process(const struct task_struct *thread,
-					struct file *filep);
+static struct kfd_process *create_process(const struct task_struct *thread);
+static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
 
 static void evict_process_worker(struct work_struct *work);
 static void restore_process_worker(struct work_struct *work);
@@ -289,7 +289,15 @@ struct kfd_process *kfd_create_process(struct file *filep)
 	if (process) {
 		pr_debug("Process already found\n");
 	} else {
-		process = create_process(thread, filep);
+		process = create_process(thread);
+		if (IS_ERR(process))
+			goto out;
+
+		ret = kfd_process_init_cwsr_apu(process, filep);
+		if (ret) {
+			process = ERR_PTR(ret);
+			goto out;
+		}
 
 		if (!procfs.kobj)
 			goto out;
@@ -609,81 +617,69 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
 	return 0;
 }
 
-static struct kfd_process *create_process(const struct task_struct *thread,
-					struct file *filep)
+/*
+ * On return the kfd_process is fully operational and will be freed when the
+ * mm is released
+ */
+static struct kfd_process *create_process(const struct task_struct *thread)
 {
 	struct kfd_process *process;
 	int err = -ENOMEM;
 
 	process = kzalloc(sizeof(*process), GFP_KERNEL);
-
 	if (!process)
 		goto err_alloc_process;
 
-	process->pasid = kfd_pasid_alloc();
-	if (process->pasid == 0)
-		goto err_alloc_pasid;
-
-	if (kfd_alloc_process_doorbells(process) < 0)
-		goto err_alloc_doorbells;
-
 	kref_init(&process->ref);
-
 	mutex_init(&process->mutex);
-
 	process->mm = thread->mm;
-
-	/* register notifier */
-	process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
-	err = mmu_notifier_register(&process->mmu_notifier, process->mm);
-	if (err)
-		goto err_mmu_notifier;
-
-	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
-			(uintptr_t)process->mm);
-
 	process->lead_thread = thread->group_leader;
-	get_task_struct(process->lead_thread);
-
 	INIT_LIST_HEAD(&process->per_device_data);
-
+	INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
+	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
+	process->last_restore_timestamp = get_jiffies_64();
 	kfd_event_init_process(process);
+	process->is_32bit_user_mode = in_compat_syscall();
+
+	process->pasid = kfd_pasid_alloc();
+	if (process->pasid == 0)
+		goto err_alloc_pasid;
+
+	if (kfd_alloc_process_doorbells(process) < 0)
+		goto err_alloc_doorbells;
 
 	err = pqm_init(&process->pqm, process);
 	if (err != 0)
 		goto err_process_pqm_init;
 
 	/* init process apertures*/
-	process->is_32bit_user_mode = in_compat_syscall();
 	err = kfd_init_apertures(process);
 	if (err != 0)
 		goto err_init_apertures;
 
-	INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
-	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
-	process->last_restore_timestamp = get_jiffies_64();
-
-	err = kfd_process_init_cwsr_apu(process, filep);
+	/* Must be last, have to use release destruction after this */
+	process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops;
+	err = mmu_notifier_register(&process->mmu_notifier, process->mm);
 	if (err)
-		goto err_init_cwsr;
+		goto err_register_notifier;
+
+	get_task_struct(process->lead_thread);
+	hash_add_rcu(kfd_processes_table, &process->kfd_processes,
+			(uintptr_t)process->mm);
 
 	return process;
 
-err_init_cwsr:
+err_register_notifier:
 	kfd_process_free_outstanding_kfd_bos(process);
 	kfd_process_destroy_pdds(process);
 err_init_apertures:
 	pqm_uninit(&process->pqm);
 err_process_pqm_init:
-	hash_del_rcu(&process->kfd_processes);
-	synchronize_rcu();
-	mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
-err_mmu_notifier:
-	mutex_destroy(&process->mutex);
 	kfd_free_process_doorbells(process);
 err_alloc_doorbells:
 	kfd_pasid_free(process->pasid);
 err_alloc_pasid:
+	mutex_destroy(&process->mutex);
 	kfree(process);
 err_alloc_process:
 	return ERR_PTR(err);
-- 
2.22.0

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

  parent reply	other threads:[~2019-08-06 23:17 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-06 23:15 [PATCH v3 hmm 00/11] Add mmu_notifier_get/put for managing mmu notifier registrations Jason Gunthorpe
2019-08-06 23:15 ` [PATCH v3 hmm 01/11] mm/mmu_notifiers: hoist do_mmu_notifier_register down_write to the caller Jason Gunthorpe
2019-08-08 10:24   ` Christoph Hellwig
2019-08-14 20:14   ` Ralph Campbell
2019-08-06 23:15 ` [PATCH v3 hmm 02/11] mm/mmu_notifiers: do not speculatively allocate a mmu_notifier_mm Jason Gunthorpe
2019-08-08 10:26   ` Christoph Hellwig
2019-08-14 20:32   ` Ralph Campbell
2019-08-06 23:15 ` [PATCH v3 hmm 03/11] mm/mmu_notifiers: add a get/put scheme for the registration Jason Gunthorpe
2019-08-14 21:20   ` Ralph Campbell
2019-08-15  0:13     ` Jason Gunthorpe
2019-08-06 23:15 ` [PATCH v3 hmm 04/11] misc/sgi-gru: use mmu_notifier_get/put for struct gru_mm_struct Jason Gunthorpe
2019-08-08 10:25   ` Christoph Hellwig
2019-08-14 15:58     ` Jason Gunthorpe
2019-08-14 17:18       ` Dimitri Sivanich
2019-08-06 23:15 ` [PATCH v3 hmm 05/11] hmm: use mmu_notifier_get/put for 'struct hmm' Jason Gunthorpe
2019-08-08 10:28   ` Christoph Hellwig
2019-08-14 21:51   ` Ralph Campbell
2019-08-06 23:15 ` [PATCH v3 hmm 06/11] RDMA/odp: use mmu_notifier_get/put for 'struct ib_ucontext_per_mm' Jason Gunthorpe
2019-08-06 23:15 ` [PATCH v3 hmm 07/11] RDMA/odp: remove ib_ucontext from ib_umem Jason Gunthorpe
2019-08-06 23:15 ` [PATCH v3 hmm 08/11] drm/radeon: use mmu_notifier_get/put for struct radeon_mn Jason Gunthorpe
2019-08-14 16:07   ` Jason Gunthorpe
2019-08-15  8:28   ` Christian König
2019-08-15 19:46     ` Jason Gunthorpe
2019-08-06 23:15 ` Jason Gunthorpe [this message]
2019-08-06 23:15 ` [PATCH v3 hmm 10/11] drm/amdkfd: use mmu_notifier_put Jason Gunthorpe
2019-08-06 23:47   ` Kuehling, Felix
2019-08-07 11:42     ` Jason Gunthorpe
2019-08-06 23:15 ` [PATCH v3 hmm 11/11] mm/mmu_notifiers: remove unregister_no_release Jason Gunthorpe
2019-08-08 10:29   ` Christoph Hellwig
2019-08-14 21:53   ` Ralph Campbell
2019-08-14 23:56 ` [PATCH v3 hmm 00/11] Add mmu_notifier_get/put for managing mmu notifier registrations Ralph Campbell
2019-08-16 15:14 ` Jason Gunthorpe
2019-08-21 19:53 ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190806231548.25242-10-jgg@ziepe.ca \
    --to=jgg@ziepe.ca \
    --cc=David1.Zhou@amd.com \
    --cc=Felix.Kuehling@amd.com \
    --cc=aarcange@redhat.com \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=andrea@betterlinux.com \
    --cc=christian.koenig@amd.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=hch@lst.de \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=iommu@lists.linux-foundation.org \
    --cc=jgg@mellanox.com \
    --cc=jglisse@redhat.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=rcampbell@nvidia.com \
    --cc=shangw@linux.vnet.ibm.com \
    --cc=sivanich@sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).