All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sean Christopherson <sean.j.christopherson@intel.com>
To: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Cc: linux-sgx@vger.kernel.org
Subject: [PATCH for_v21 v2 2/2] x86/sgx: Use mmu_notifier.release() instead of per-vma refcounting
Date: Thu, 11 Jul 2019 20:41:38 -0700	[thread overview]
Message-ID: <20190712034138.18564-3-sean.j.christopherson@intel.com> (raw)
In-Reply-To: <20190712034138.18564-1-sean.j.christopherson@intel.com>

Using per-vma refcounting to track mm_structs associated with an enclave
requires hooking .vm_close(), which in turn prevents the mm from merging
vmas (precisely to allow refcounting).

Avoid refcounting encl_mm altogether by registering an mmu_notifier at
.mmap(), removing the dying encl_mm at mmu_notifier.release() and
protecting mm_list during reclaim via a per-enclave SRCU.

Removing refcounting/vm_close() allows merging of enclave vmas, at the
cost of delaying removal of encl_mm structs from mm_list, i.e. an mm is
disassociated from an enclave when the mm exits or the enclave dies, as
opposed to when the last vma (in a given mm) is closed.

The impact of delying encl_mm removal is its memory footprint and
whatever overhead is incurred during EPC reclaim (to walk an mm's vmas).
Practically speaking, a stale encl_mm will exist for a meaningful amount
of time if and only if the enclave is mapped in a long-lived process and
then passed off to another long-lived process.  It is expected that the
vast majority of use cases will not encounter this condition, e.g. even
using a daemon to build enclaves should not result in a stale encl_mm as
the builder should never need to mmap() the enclave.

Even if there are scenarios that lead to defunct encl_mms, the cost is
likely far outweighed by the benefits of reducing the number of vmas
across all enclaves.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/Kconfig                      |  1 +
 arch/x86/kernel/cpu/sgx/driver/main.c | 26 ++++++++
 arch/x86/kernel/cpu/sgx/encl.c        | 89 ++++++++++++---------------
 arch/x86/kernel/cpu/sgx/encl.h        |  5 +-
 4 files changed, 71 insertions(+), 50 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 17558cf48a8a..2957dc59e622 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1919,6 +1919,7 @@ config INTEL_SGX
 	bool "Intel SGX core functionality"
 	depends on X86_64 && CPU_SUP_INTEL
 	select SRCU
+	select MMU_NOTIFIER
 	---help---
 	  Intel(R) SGX is a set of CPU instructions that can be used by
 	  applications to set aside private regions of code and data, referred
diff --git a/arch/x86/kernel/cpu/sgx/driver/main.c b/arch/x86/kernel/cpu/sgx/driver/main.c
index c4f263adf08f..3ee6fd716ce4 100644
--- a/arch/x86/kernel/cpu/sgx/driver/main.c
+++ b/arch/x86/kernel/cpu/sgx/driver/main.c
@@ -53,6 +53,32 @@ static int sgx_open(struct inode *inode, struct file *file)
 static int sgx_release(struct inode *inode, struct file *file)
 {
 	struct sgx_encl *encl = file->private_data;
+	struct sgx_encl_mm *encl_mm;
+
+	/*
+	 * Objects can't be *moved* off an RCU protected list (deletion is ok),
+	 * nor can the object be freed until after synchronize_srcu().
+	 */
+restart:
+	spin_lock(&encl->mm_lock);
+	if (list_empty(&encl->mm_list)) {
+		encl_mm = NULL;
+	} else {
+		encl_mm = list_first_entry(&encl->mm_list, struct sgx_encl_mm,
+					   list);
+		list_del_rcu(&encl_mm->list);
+	}
+	spin_unlock(&encl->mm_lock);
+
+	if (encl_mm) {
+		synchronize_srcu(&encl->srcu);
+
+		mmu_notifier_unregister(&encl_mm->mmu_notifier, encl_mm->mm);
+
+		kfree(encl_mm);
+
+		goto restart;
+	}
 
 	mutex_lock(&encl->lock);
 	encl->flags |= SGX_ENCL_DEAD;
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index a0e58f2af251..abc03934adaf 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -133,45 +133,51 @@ static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
 	return entry;
 }
 
-static void sgx_encl_mm_release_deferred(struct work_struct *work)
+static void sgx_encl_mm_release_deferred(struct rcu_head *rcu)
 {
 	struct sgx_encl_mm *encl_mm =
-		container_of(work, struct sgx_encl_mm, release_work);
+		container_of(rcu, struct sgx_encl_mm, rcu);
 
-	mmdrop(encl_mm->mm);
-	synchronize_srcu(&encl_mm->encl->srcu);
 	kfree(encl_mm);
 }
 
-static void sgx_encl_mm_release(struct kref *ref)
+static void sgx_mmu_notifier_release(struct mmu_notifier *mn,
+				     struct mm_struct *mm)
 {
 	struct sgx_encl_mm *encl_mm =
-		container_of(ref, struct sgx_encl_mm, refcount);
+		container_of(mn, struct sgx_encl_mm, mmu_notifier);
+	struct sgx_encl_mm *tmp = NULL;
 
+	/*
+	 * The enclave itself can remove encl_mm.  Note, objects can't be moved
+	 * off an RCU protected list, but deletion is ok.
+	 */
 	spin_lock(&encl_mm->encl->mm_lock);
-	list_del_rcu(&encl_mm->list);
+	list_for_each_entry(tmp, &encl_mm->encl->mm_list, list) {
+		if (tmp == encl_mm) {
+			list_del_rcu(&encl_mm->list);
+			break;
+		}
+	}
 	spin_unlock(&encl_mm->encl->mm_lock);
 
-	/*
-	 * If the mm has users, then do SRCU synchronization in a worker thread
-	 * to avoid a deadlock with the reclaimer.  The caller holds mmap_sem
-	 * for write, while the reclaimer will acquire mmap_sem for read if it
-	 * is reclaiming from this enclave.  Invoking synchronize_srcu() here
-	 * will hang waiting for the reclaimer to drop its RCU read lock, while
-	 * the reclaimer will get stuck waiting to acquire mmap_sem.  The async
-	 * shenanigans can be avoided if there are no mm users as the reclaimer
-	 * will not acquire mmap_sem in that case.
-	 */
-	if (atomic_read(&encl_mm->mm->mm_users)) {
-		mmgrab(encl_mm->mm);
-		INIT_WORK(&encl_mm->release_work, sgx_encl_mm_release_deferred);
-		schedule_work(&encl_mm->release_work);
-	} else {
+	if (tmp == encl_mm) {
 		synchronize_srcu(&encl_mm->encl->srcu);
-		kfree(encl_mm);
+
+		/*
+		 * Delay freeing encl_mm until after mmu_notifier synchronizes
+		 * its SRCU to ensure encl_mm cannot be dereferenced.
+		 */
+		mmu_notifier_unregister_no_release(mn, mm);
+		mmu_notifier_call_srcu(&encl_mm->rcu,
+				       &sgx_encl_mm_release_deferred);
 	}
 }
 
+static const struct mmu_notifier_ops sgx_mmu_notifier_ops = {
+	.release		= sgx_mmu_notifier_release,
+};
+
 static struct sgx_encl_mm *sgx_encl_find_mm(struct sgx_encl *encl,
 					    struct mm_struct *mm)
 {
@@ -196,6 +202,7 @@ static struct sgx_encl_mm *sgx_encl_find_mm(struct sgx_encl *encl,
 int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
 {
 	struct sgx_encl_mm *encl_mm;
+	int ret;
 
 	lockdep_assert_held_exclusive(&mm->mmap_sem);
 
@@ -203,12 +210,11 @@ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
 		return -EINVAL;
 
 	/*
-	 * A dying encl_mm can be observed when synchronize_srcu() is called
-	 * asynchronously via sgx_encl_mm_release(), e.g. if mmap() closely
-	 * follows munmap().
+	 * mm_structs are kept on mm_list until the mm or the enclave dies,
+	 * i.e. once an mm is off the list, it's gone for good, therefore it's
+	 * impossible to get a false positive on @mm due to a stale mm_list.
 	 */
-	encl_mm = sgx_encl_find_mm(encl, mm);
-	if (encl_mm && kref_get_unless_zero(&encl_mm->refcount))
+	if (sgx_encl_find_mm(encl, mm))
 		return 0;
 
 	encl_mm = kzalloc(sizeof(*encl_mm), GFP_KERNEL);
@@ -217,18 +223,18 @@ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
 
 	encl_mm->encl = encl;
 	encl_mm->mm = mm;
-	kref_init(&encl_mm->refcount);
+	encl_mm->mmu_notifier.ops = &sgx_mmu_notifier_ops;
+
+	ret = __mmu_notifier_register(&encl_mm->mmu_notifier, mm);
+	if (ret) {
+		kfree(encl_mm);
+		return ret;
+	}
 
 	spin_lock(&encl->mm_lock);
 	list_add_rcu(&encl_mm->list, &encl->mm_list);
 	spin_unlock(&encl->mm_lock);
 
-	/*
-	 * Note, in addition to ensuring reclaim sees all encl_mms that have a
-	 * valid mapping, this synchronize_srcu() also ensures that at most one
-	 * matching encl_mm is encountered by the refcouting flows, i.e. a live
-	 * encl_mm isn't hiding behind a dying encl_mm.
-	 */
 	synchronize_srcu(&encl->srcu);
 
 	return 0;
@@ -245,18 +251,6 @@ static void sgx_vma_open(struct vm_area_struct *vma)
 		vma->vm_private_data = NULL;
 }
 
-static void sgx_vma_close(struct vm_area_struct *vma)
-{
-	struct sgx_encl *encl = vma->vm_private_data;
-	struct sgx_encl_mm *encl_mm;
-
-	if (!encl)
-		return;
-
-	encl_mm = sgx_encl_find_mm(encl, vma->vm_mm);
-	kref_put(&encl_mm->refcount, sgx_encl_mm_release);
-}
-
 static unsigned int sgx_vma_fault(struct vm_fault *vmf)
 {
 	unsigned long addr = (unsigned long)vmf->address;
@@ -440,7 +434,6 @@ static int sgx_vma_access(struct vm_area_struct *vma, unsigned long addr,
 }
 
 const struct vm_operations_struct sgx_vm_ops = {
-	.close = sgx_vma_close,
 	.open = sgx_vma_open,
 	.fault = sgx_vma_fault,
 	.may_mprotect = sgx_vma_mprotect,
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
index f1d01a83d9bc..8d101b85b06a 100644
--- a/arch/x86/kernel/cpu/sgx/encl.h
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -9,6 +9,7 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/mm_types.h>
+#include <linux/mmu_notifier.h>
 #include <linux/mutex.h>
 #include <linux/notifier.h>
 #include <linux/radix-tree.h>
@@ -58,9 +59,9 @@ enum sgx_encl_flags {
 struct sgx_encl_mm {
 	struct sgx_encl *encl;
 	struct mm_struct *mm;
-	struct kref refcount;
 	struct list_head list;
-	struct work_struct release_work;
+	struct mmu_notifier mmu_notifier;
+	struct rcu_head rcu;
 };
 
 struct sgx_encl {
-- 
2.22.0


  parent reply	other threads:[~2019-07-12  3:41 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-12  3:41 [PATCH for_v21 v2 0/2] x86/sgx: Use SRCU and mmu_notifier Sean Christopherson
2019-07-12  3:41 ` [PATCH for_v21 v2 1/2] x86/sgx: Use SRCU to protect mm_list during reclaim Sean Christopherson
2019-07-12  5:02   ` Jarkko Sakkinen
2019-07-12  3:41 ` Sean Christopherson [this message]
2019-07-12  4:20 ` [PATCH for_v21 v2 0/2] x86/sgx: Use SRCU and mmu_notifier Jarkko Sakkinen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190712034138.18564-3-sean.j.christopherson@intel.com \
    --to=sean.j.christopherson@intel.com \
    --cc=jarkko.sakkinen@linux.intel.com \
    --cc=linux-sgx@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.