All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kristen Carlson Accardi <kristen@linux.intel.com>
To: jarkko@kernel.org, dave.hansen@linux.kernel.org, tj@kernel.org,
	linux-kernel@vger.kernel.org, linux-sgx@vger.kernel.org,
	cgroups@vger.kernel.org,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>
Cc: zhiquan1.li@intel.com,
	Kristen Carlson Accardi <kristen@linux.intel.com>,
	Sean Christopherson <seanjc@google.com>
Subject: [PATCH 05/26] x86/sgx: Track epc pages on reclaimable or unreclaimable lists
Date: Fri, 11 Nov 2022 10:35:10 -0800	[thread overview]
Message-ID: <20221111183532.3676646-6-kristen@linux.intel.com> (raw)
In-Reply-To: <20221111183532.3676646-1-kristen@linux.intel.com>

Replace functions sgx_mark_page_reclaimable() and
sgx_unmark_page_reclaimable() with sgx_record_epc_page() and
sgx_drop_epc_page(). sgx_record_epc_page() wil add the epc_page
to the correct "reclaimable" or "unreclaimable" list in the
sgx_epc_lru struct. sgx_drop_epc_page() will delete the page
from the sgx_epc_lru list. Tracking pages that are not tracked by
the reclaimer in the LRU's "unreclaimable" list allows an OOM event
to cause all the pages in use by an enclave to be freed, regardless
of whether they were reclaimable pages or not.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Kristen Carlson Accardi <kristen@linux.intel.com>
Cc: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kernel/cpu/sgx/encl.c  | 10 +++++++---
 arch/x86/kernel/cpu/sgx/ioctl.c | 11 +++++++----
 arch/x86/kernel/cpu/sgx/main.c  | 26 +++++++++++++++-----------
 arch/x86/kernel/cpu/sgx/sgx.h   |  4 ++--
 arch/x86/kernel/cpu/sgx/virt.c  | 28 ++++++++++++++++++++--------
 5 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 4eaf9d21e71b..4683da9ef4f1 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -252,6 +252,7 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
 		epc_page = sgx_encl_eldu(&encl->secs, NULL);
 		if (IS_ERR(epc_page))
 			return ERR_CAST(epc_page);
+		sgx_record_epc_page(epc_page, 0);
 	}
 
 	epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
@@ -259,7 +260,7 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
 		return ERR_CAST(epc_page);
 
 	encl->secs_child_cnt++;
-	sgx_mark_page_reclaimable(entry->epc_page);
+	sgx_record_epc_page(entry->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
 
 	return entry;
 }
@@ -375,7 +376,7 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
 	encl_page->type = SGX_PAGE_TYPE_REG;
 	encl->secs_child_cnt++;
 
-	sgx_mark_page_reclaimable(encl_page->epc_page);
+	sgx_record_epc_page(encl_page->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
 
 	phys_addr = sgx_get_epc_phys_addr(epc_page);
 	/*
@@ -687,7 +688,7 @@ void sgx_encl_release(struct kref *ref)
 			 * The page and its radix tree entry cannot be freed
 			 * if the page is being held by the reclaimer.
 			 */
-			if (sgx_unmark_page_reclaimable(entry->epc_page))
+			if (sgx_drop_epc_page(entry->epc_page))
 				continue;
 
 			sgx_encl_free_epc_page(entry->epc_page);
@@ -703,6 +704,7 @@ void sgx_encl_release(struct kref *ref)
 	xa_destroy(&encl->page_array);
 
 	if (!encl->secs_child_cnt && encl->secs.epc_page) {
+		sgx_drop_epc_page(encl->secs.epc_page);
 		sgx_encl_free_epc_page(encl->secs.epc_page);
 		encl->secs.epc_page = NULL;
 	}
@@ -711,6 +713,7 @@ void sgx_encl_release(struct kref *ref)
 		va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
 					   list);
 		list_del(&va_page->list);
+		sgx_drop_epc_page(va_page->epc_page);
 		sgx_encl_free_epc_page(va_page->epc_page);
 		kfree(va_page);
 	}
@@ -1218,6 +1221,7 @@ struct sgx_epc_page *sgx_alloc_va_page(struct sgx_encl *encl, bool reclaim)
 		sgx_encl_free_epc_page(epc_page);
 		return ERR_PTR(-EFAULT);
 	}
+	sgx_record_epc_page(epc_page, 0);
 
 	return epc_page;
 }
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 9a1bb3c3211a..aca80a3f38a1 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -48,6 +48,7 @@ void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
 	encl->page_cnt--;
 
 	if (va_page) {
+		sgx_drop_epc_page(va_page->epc_page);
 		sgx_encl_free_epc_page(va_page->epc_page);
 		list_del(&va_page->list);
 		kfree(va_page);
@@ -113,6 +114,8 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
 	encl->attributes = secs->attributes;
 	encl->attributes_mask = SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT | SGX_ATTR_KSS;
 
+	sgx_record_epc_page(encl->secs.epc_page, 0);
+
 	/* Set only after completion, as encl->lock has not been taken. */
 	set_bit(SGX_ENCL_CREATED, &encl->flags);
 
@@ -322,7 +325,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
 			goto err_out;
 	}
 
-	sgx_mark_page_reclaimable(encl_page->epc_page);
+	sgx_record_epc_page(encl_page->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
 	mutex_unlock(&encl->lock);
 	mmap_read_unlock(current->mm);
 	return ret;
@@ -958,7 +961,7 @@ static long sgx_enclave_modify_types(struct sgx_encl *encl,
 			 * Prevent page from being reclaimed while mutex
 			 * is released.
 			 */
-			if (sgx_unmark_page_reclaimable(entry->epc_page)) {
+			if (sgx_drop_epc_page(entry->epc_page)) {
 				ret = -EAGAIN;
 				goto out_entry_changed;
 			}
@@ -973,7 +976,7 @@ static long sgx_enclave_modify_types(struct sgx_encl *encl,
 
 			mutex_lock(&encl->lock);
 
-			sgx_mark_page_reclaimable(entry->epc_page);
+			sgx_record_epc_page(entry->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
 		}
 
 		/* Change EPC type */
@@ -1130,7 +1133,7 @@ static long sgx_encl_remove_pages(struct sgx_encl *encl,
 			goto out_unlock;
 		}
 
-		if (sgx_unmark_page_reclaimable(entry->epc_page)) {
+		if (sgx_drop_epc_page(entry->epc_page)) {
 			ret = -EBUSY;
 			goto out_unlock;
 		}
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index aa938e4d4a73..3b09433ffd85 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -262,7 +262,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
 			goto out;
 
 		sgx_encl_ewb(encl->secs.epc_page, &secs_backing);
-
+		sgx_drop_epc_page(encl->secs.epc_page);
 		sgx_encl_free_epc_page(encl->secs.epc_page);
 		encl->secs.epc_page = NULL;
 
@@ -499,31 +499,35 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
 }
 
 /**
- * sgx_mark_page_reclaimable() - Mark a page as reclaimable
+ * sgx_record_epc_page() - Add a page to the LRU tracking
  * @page:	EPC page
  *
- * Mark a page as reclaimable and add it to the active page list. Pages
- * are automatically removed from the active list when freed.
+ * Mark a page with the specified flags and add it to the appropriate
+ * (un)reclaimable list.
  */
-void sgx_mark_page_reclaimable(struct sgx_epc_page *page)
+void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags)
 {
 	spin_lock(&sgx_global_lru.lock);
-	page->flags |= SGX_EPC_PAGE_RECLAIMER_TRACKED;
-	sgx_epc_push_reclaimable(&sgx_global_lru, page);
+	WARN_ON(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
+	page->flags |= flags;
+	if (flags & SGX_EPC_PAGE_RECLAIMER_TRACKED)
+		sgx_epc_push_reclaimable(&sgx_global_lru, page);
+	else
+		sgx_epc_push_unreclaimable(&sgx_global_lru, page);
 	spin_unlock(&sgx_global_lru.lock);
 }
 
 /**
- * sgx_unmark_page_reclaimable() - Remove a page from the reclaim list
+ * sgx_drop_epc_page() - Remove a page from a LRU list
  * @page:	EPC page
  *
- * Clear the reclaimable flag and remove the page from the active page list.
+ * Clear the reclaimable flag if set and remove the page from its LRU.
  *
  * Return:
  *   0 on success,
  *   -EBUSY if the page is in the process of being reclaimed
  */
-int sgx_unmark_page_reclaimable(struct sgx_epc_page *page)
+int sgx_drop_epc_page(struct sgx_epc_page *page)
 {
 	spin_lock(&sgx_global_lru.lock);
 	if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
@@ -533,9 +537,9 @@ int sgx_unmark_page_reclaimable(struct sgx_epc_page *page)
 			return -EBUSY;
 		}
 
-		list_del(&page->list);
 		page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
 	}
+	list_del(&page->list);
 	spin_unlock(&sgx_global_lru.lock);
 
 	return 0;
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index aac7d4feb0fa..969606615211 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -140,8 +140,8 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void);
 void sgx_free_epc_page(struct sgx_epc_page *page);
 
 void sgx_reclaim_direct(void);
-void sgx_mark_page_reclaimable(struct sgx_epc_page *page);
-int sgx_unmark_page_reclaimable(struct sgx_epc_page *page);
+void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags);
+int sgx_drop_epc_page(struct sgx_epc_page *page);
 struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim);
 
 void sgx_ipi_cb(void *info);
diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
index 776ae5c1c032..0eabc4db91d0 100644
--- a/arch/x86/kernel/cpu/sgx/virt.c
+++ b/arch/x86/kernel/cpu/sgx/virt.c
@@ -64,6 +64,8 @@ static int __sgx_vepc_fault(struct sgx_vepc *vepc,
 		goto err_delete;
 	}
 
+	sgx_record_epc_page(epc_page, 0);
+
 	return 0;
 
 err_delete:
@@ -148,6 +150,7 @@ static int sgx_vepc_free_page(struct sgx_epc_page *epc_page)
 		return ret;
 	}
 
+	sgx_drop_epc_page(epc_page);
 	sgx_free_epc_page(epc_page);
 	return 0;
 }
@@ -220,8 +223,15 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
 		 * have been removed, the SECS page must have a child on
 		 * another instance.
 		 */
-		if (sgx_vepc_free_page(epc_page))
+		if (sgx_vepc_free_page(epc_page)) {
+			/*
+			 * Drop the page before adding it to the list of SECS
+			 * pages.  Moving the page off the unreclaimable list
+			 * needs to be done under the LRU's spinlock.
+			 */
+			sgx_drop_epc_page(epc_page);
 			list_add_tail(&epc_page->list, &secs_pages);
+		}
 
 		xa_erase(&vepc->page_array, index);
 	}
@@ -236,15 +246,17 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
 	mutex_lock(&zombie_secs_pages_lock);
 	list_for_each_entry_safe(epc_page, tmp, &zombie_secs_pages, list) {
 		/*
-		 * Speculatively remove the page from the list of zombies,
-		 * if the page is successfully EREMOVE'd it will be added to
-		 * the list of free pages.  If EREMOVE fails, throw the page
-		 * on the local list, which will be spliced on at the end.
+		 * If EREMOVE fails, throw the page on the local list, which
+		 * will be spliced on at the end.
+		 *
+		 * Note, this abuses sgx_drop_epc_page() to delete the page off
+		 * the list of zombies, but this is a very rare path (probably
+		 * never hit in production).  It's not worth special casing the
+		 * free path for this super rare case just to avoid taking the
+		 * LRU's spinlock.
 		 */
-		list_del(&epc_page->list);
-
 		if (sgx_vepc_free_page(epc_page))
-			list_add_tail(&epc_page->list, &secs_pages);
+			list_move_tail(&epc_page->list, &secs_pages);
 	}
 
 	if (!list_empty(&secs_pages))
-- 
2.37.3


WARNING: multiple messages have this Message-ID (diff)
From: Kristen Carlson Accardi <kristen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
To: jarkko-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
	dave.hansen-CPWUtch7KCBzeIdxy0IIJw@public.gmane.org,
	tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-sgx-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Dave Hansen <dave.hansen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>,
	Thomas Gleixner <tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org>,
	Ingo Molnar <mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,
	Borislav Petkov <bp-Gina5bIWoIWzQB+pC5nmwQ@public.gmane.org>,
	x86-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
	"H. Peter Anvin" <hpa-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org>
Cc: zhiquan1.li-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org,
	Kristen Carlson Accardi
	<kristen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>,
	Sean Christopherson
	<seanjc-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Subject: [PATCH 05/26] x86/sgx: Track epc pages on reclaimable or unreclaimable lists
Date: Fri, 11 Nov 2022 10:35:10 -0800	[thread overview]
Message-ID: <20221111183532.3676646-6-kristen@linux.intel.com> (raw)
In-Reply-To: <20221111183532.3676646-1-kristen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>

Replace functions sgx_mark_page_reclaimable() and
sgx_unmark_page_reclaimable() with sgx_record_epc_page() and
sgx_drop_epc_page(). sgx_record_epc_page() wil add the epc_page
to the correct "reclaimable" or "unreclaimable" list in the
sgx_epc_lru struct. sgx_drop_epc_page() will delete the page
from the sgx_epc_lru list. Tracking pages that are not tracked by
the reclaimer in the LRU's "unreclaimable" list allows an OOM event
to cause all the pages in use by an enclave to be freed, regardless
of whether they were reclaimable pages or not.

Signed-off-by: Sean Christopherson <sean.j.christopherson-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Kristen Carlson Accardi <kristen-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Cc: Sean Christopherson <seanjc-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
 arch/x86/kernel/cpu/sgx/encl.c  | 10 +++++++---
 arch/x86/kernel/cpu/sgx/ioctl.c | 11 +++++++----
 arch/x86/kernel/cpu/sgx/main.c  | 26 +++++++++++++++-----------
 arch/x86/kernel/cpu/sgx/sgx.h   |  4 ++--
 arch/x86/kernel/cpu/sgx/virt.c  | 28 ++++++++++++++++++++--------
 5 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 4eaf9d21e71b..4683da9ef4f1 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -252,6 +252,7 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
 		epc_page = sgx_encl_eldu(&encl->secs, NULL);
 		if (IS_ERR(epc_page))
 			return ERR_CAST(epc_page);
+		sgx_record_epc_page(epc_page, 0);
 	}
 
 	epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
@@ -259,7 +260,7 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
 		return ERR_CAST(epc_page);
 
 	encl->secs_child_cnt++;
-	sgx_mark_page_reclaimable(entry->epc_page);
+	sgx_record_epc_page(entry->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
 
 	return entry;
 }
@@ -375,7 +376,7 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
 	encl_page->type = SGX_PAGE_TYPE_REG;
 	encl->secs_child_cnt++;
 
-	sgx_mark_page_reclaimable(encl_page->epc_page);
+	sgx_record_epc_page(encl_page->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
 
 	phys_addr = sgx_get_epc_phys_addr(epc_page);
 	/*
@@ -687,7 +688,7 @@ void sgx_encl_release(struct kref *ref)
 			 * The page and its radix tree entry cannot be freed
 			 * if the page is being held by the reclaimer.
 			 */
-			if (sgx_unmark_page_reclaimable(entry->epc_page))
+			if (sgx_drop_epc_page(entry->epc_page))
 				continue;
 
 			sgx_encl_free_epc_page(entry->epc_page);
@@ -703,6 +704,7 @@ void sgx_encl_release(struct kref *ref)
 	xa_destroy(&encl->page_array);
 
 	if (!encl->secs_child_cnt && encl->secs.epc_page) {
+		sgx_drop_epc_page(encl->secs.epc_page);
 		sgx_encl_free_epc_page(encl->secs.epc_page);
 		encl->secs.epc_page = NULL;
 	}
@@ -711,6 +713,7 @@ void sgx_encl_release(struct kref *ref)
 		va_page = list_first_entry(&encl->va_pages, struct sgx_va_page,
 					   list);
 		list_del(&va_page->list);
+		sgx_drop_epc_page(va_page->epc_page);
 		sgx_encl_free_epc_page(va_page->epc_page);
 		kfree(va_page);
 	}
@@ -1218,6 +1221,7 @@ struct sgx_epc_page *sgx_alloc_va_page(struct sgx_encl *encl, bool reclaim)
 		sgx_encl_free_epc_page(epc_page);
 		return ERR_PTR(-EFAULT);
 	}
+	sgx_record_epc_page(epc_page, 0);
 
 	return epc_page;
 }
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 9a1bb3c3211a..aca80a3f38a1 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -48,6 +48,7 @@ void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
 	encl->page_cnt--;
 
 	if (va_page) {
+		sgx_drop_epc_page(va_page->epc_page);
 		sgx_encl_free_epc_page(va_page->epc_page);
 		list_del(&va_page->list);
 		kfree(va_page);
@@ -113,6 +114,8 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
 	encl->attributes = secs->attributes;
 	encl->attributes_mask = SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT | SGX_ATTR_KSS;
 
+	sgx_record_epc_page(encl->secs.epc_page, 0);
+
 	/* Set only after completion, as encl->lock has not been taken. */
 	set_bit(SGX_ENCL_CREATED, &encl->flags);
 
@@ -322,7 +325,7 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
 			goto err_out;
 	}
 
-	sgx_mark_page_reclaimable(encl_page->epc_page);
+	sgx_record_epc_page(encl_page->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
 	mutex_unlock(&encl->lock);
 	mmap_read_unlock(current->mm);
 	return ret;
@@ -958,7 +961,7 @@ static long sgx_enclave_modify_types(struct sgx_encl *encl,
 			 * Prevent page from being reclaimed while mutex
 			 * is released.
 			 */
-			if (sgx_unmark_page_reclaimable(entry->epc_page)) {
+			if (sgx_drop_epc_page(entry->epc_page)) {
 				ret = -EAGAIN;
 				goto out_entry_changed;
 			}
@@ -973,7 +976,7 @@ static long sgx_enclave_modify_types(struct sgx_encl *encl,
 
 			mutex_lock(&encl->lock);
 
-			sgx_mark_page_reclaimable(entry->epc_page);
+			sgx_record_epc_page(entry->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
 		}
 
 		/* Change EPC type */
@@ -1130,7 +1133,7 @@ static long sgx_encl_remove_pages(struct sgx_encl *encl,
 			goto out_unlock;
 		}
 
-		if (sgx_unmark_page_reclaimable(entry->epc_page)) {
+		if (sgx_drop_epc_page(entry->epc_page)) {
 			ret = -EBUSY;
 			goto out_unlock;
 		}
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index aa938e4d4a73..3b09433ffd85 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -262,7 +262,7 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
 			goto out;
 
 		sgx_encl_ewb(encl->secs.epc_page, &secs_backing);
-
+		sgx_drop_epc_page(encl->secs.epc_page);
 		sgx_encl_free_epc_page(encl->secs.epc_page);
 		encl->secs.epc_page = NULL;
 
@@ -499,31 +499,35 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
 }
 
 /**
- * sgx_mark_page_reclaimable() - Mark a page as reclaimable
+ * sgx_record_epc_page() - Add a page to the LRU tracking
  * @page:	EPC page
  *
- * Mark a page as reclaimable and add it to the active page list. Pages
- * are automatically removed from the active list when freed.
+ * Mark a page with the specified flags and add it to the appropriate
+ * (un)reclaimable list.
  */
-void sgx_mark_page_reclaimable(struct sgx_epc_page *page)
+void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags)
 {
 	spin_lock(&sgx_global_lru.lock);
-	page->flags |= SGX_EPC_PAGE_RECLAIMER_TRACKED;
-	sgx_epc_push_reclaimable(&sgx_global_lru, page);
+	WARN_ON(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
+	page->flags |= flags;
+	if (flags & SGX_EPC_PAGE_RECLAIMER_TRACKED)
+		sgx_epc_push_reclaimable(&sgx_global_lru, page);
+	else
+		sgx_epc_push_unreclaimable(&sgx_global_lru, page);
 	spin_unlock(&sgx_global_lru.lock);
 }
 
 /**
- * sgx_unmark_page_reclaimable() - Remove a page from the reclaim list
+ * sgx_drop_epc_page() - Remove a page from a LRU list
  * @page:	EPC page
  *
- * Clear the reclaimable flag and remove the page from the active page list.
+ * Clear the reclaimable flag if set and remove the page from its LRU.
  *
  * Return:
  *   0 on success,
  *   -EBUSY if the page is in the process of being reclaimed
  */
-int sgx_unmark_page_reclaimable(struct sgx_epc_page *page)
+int sgx_drop_epc_page(struct sgx_epc_page *page)
 {
 	spin_lock(&sgx_global_lru.lock);
 	if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
@@ -533,9 +537,9 @@ int sgx_unmark_page_reclaimable(struct sgx_epc_page *page)
 			return -EBUSY;
 		}
 
-		list_del(&page->list);
 		page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
 	}
+	list_del(&page->list);
 	spin_unlock(&sgx_global_lru.lock);
 
 	return 0;
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index aac7d4feb0fa..969606615211 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -140,8 +140,8 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void);
 void sgx_free_epc_page(struct sgx_epc_page *page);
 
 void sgx_reclaim_direct(void);
-void sgx_mark_page_reclaimable(struct sgx_epc_page *page);
-int sgx_unmark_page_reclaimable(struct sgx_epc_page *page);
+void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags);
+int sgx_drop_epc_page(struct sgx_epc_page *page);
 struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim);
 
 void sgx_ipi_cb(void *info);
diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
index 776ae5c1c032..0eabc4db91d0 100644
--- a/arch/x86/kernel/cpu/sgx/virt.c
+++ b/arch/x86/kernel/cpu/sgx/virt.c
@@ -64,6 +64,8 @@ static int __sgx_vepc_fault(struct sgx_vepc *vepc,
 		goto err_delete;
 	}
 
+	sgx_record_epc_page(epc_page, 0);
+
 	return 0;
 
 err_delete:
@@ -148,6 +150,7 @@ static int sgx_vepc_free_page(struct sgx_epc_page *epc_page)
 		return ret;
 	}
 
+	sgx_drop_epc_page(epc_page);
 	sgx_free_epc_page(epc_page);
 	return 0;
 }
@@ -220,8 +223,15 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
 		 * have been removed, the SECS page must have a child on
 		 * another instance.
 		 */
-		if (sgx_vepc_free_page(epc_page))
+		if (sgx_vepc_free_page(epc_page)) {
+			/*
+			 * Drop the page before adding it to the list of SECS
+			 * pages.  Moving the page off the unreclaimable list
+			 * needs to be done under the LRU's spinlock.
+			 */
+			sgx_drop_epc_page(epc_page);
 			list_add_tail(&epc_page->list, &secs_pages);
+		}
 
 		xa_erase(&vepc->page_array, index);
 	}
@@ -236,15 +246,17 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
 	mutex_lock(&zombie_secs_pages_lock);
 	list_for_each_entry_safe(epc_page, tmp, &zombie_secs_pages, list) {
 		/*
-		 * Speculatively remove the page from the list of zombies,
-		 * if the page is successfully EREMOVE'd it will be added to
-		 * the list of free pages.  If EREMOVE fails, throw the page
-		 * on the local list, which will be spliced on at the end.
+		 * If EREMOVE fails, throw the page on the local list, which
+		 * will be spliced on at the end.
+		 *
+		 * Note, this abuses sgx_drop_epc_page() to delete the page off
+		 * the list of zombies, but this is a very rare path (probably
+		 * never hit in production).  It's not worth special casing the
+		 * free path for this super rare case just to avoid taking the
+		 * LRU's spinlock.
 		 */
-		list_del(&epc_page->list);
-
 		if (sgx_vepc_free_page(epc_page))
-			list_add_tail(&epc_page->list, &secs_pages);
+			list_move_tail(&epc_page->list, &secs_pages);
 	}
 
 	if (!list_empty(&secs_pages))
-- 
2.37.3


  parent reply	other threads:[~2022-11-11 18:36 UTC|newest]

Thread overview: 81+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-11 18:35 [PATCH 00/26] Add Cgroup support for SGX EPC memory Kristen Carlson Accardi
2022-11-11 18:35 ` Kristen Carlson Accardi
2022-11-11 18:35 ` [PATCH 01/26] x86/sgx: Call cond_resched() at the end of sgx_reclaim_pages() Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-15 23:27   ` Jarkko Sakkinen
2022-11-15 23:27     ` Jarkko Sakkinen
2022-11-16  1:00     ` Reinette Chatre
2022-11-16  1:00       ` Reinette Chatre
2022-11-11 18:35 ` [PATCH 02/26] x86/sgx: Store struct sgx_encl when allocating new va pages Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-15 23:31   ` Jarkko Sakkinen
2022-11-15 23:31     ` Jarkko Sakkinen
2022-11-11 18:35 ` [PATCH 03/26] x86/sgx: Add 'struct sgx_epc_lru' to encapsulate lru list(s) Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-15 23:35   ` Jarkko Sakkinen
2022-11-15 23:35     ` Jarkko Sakkinen
2022-11-11 18:35 ` [PATCH 04/26] x86/sgx: Use sgx_epc_lru for existing active page list Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-11 18:35 ` Kristen Carlson Accardi [this message]
2022-11-11 18:35   ` [PATCH 05/26] x86/sgx: Track epc pages on reclaimable or unreclaimable lists Kristen Carlson Accardi
2022-11-11 18:35 ` [PATCH 06/26] x86/sgx: Introduce RECLAIM_IN_PROGRESS flag for EPC pages Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-15 23:42   ` Jarkko Sakkinen
2022-11-15 23:42     ` Jarkko Sakkinen
2022-11-11 18:35 ` [PATCH 07/26] x86/sgx: Use a list to track to-be-reclaimed pages during reclaim Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-11 18:35 ` [PATCH 08/26] x86/sgx: Add EPC page flags to identify type of page Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-11 18:35 ` [PATCH 09/26] x86/sgx: Allow reclaiming up to 32 pages, but scan 16 by default Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-11 18:35 ` [PATCH 10/26] x86/sgx: Return the number of EPC pages that were successfully reclaimed Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-11 18:35 ` [PATCH 11/26] x86/sgx: Add option to ignore age of page during EPC reclaim Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-11 18:35 ` [PATCH 12/26] x86/sgx: Add helper to retrieve SGX EPC LRU given an EPC page Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-11 18:35 ` [PATCH 13/26] x86/sgx: Prepare for multiple LRUs Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-11 18:35 ` [PATCH 14/26] x86/sgx: Expose sgx_reclaim_pages() for use by EPC cgroup Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-11 18:35 ` [PATCH 15/26] x86/sgx: Add helper to grab pages from an arbitrary EPC LRU Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-11 18:35 ` [PATCH 16/26] x86/sgx: Add EPC OOM path to forcefully reclaim EPC Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-11 18:35 ` [PATCH 17/26] cgroup/misc: Add notifier block list support for css events Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-14 22:42   ` Tejun Heo
2022-11-14 22:42     ` Tejun Heo
2022-11-14 23:10     ` Kristen Carlson Accardi
2022-11-14 23:11       ` Tejun Heo
2022-11-14 23:11         ` Tejun Heo
2022-11-14 23:17         ` Kristen Carlson Accardi
2022-11-11 18:35 ` [PATCH 18/26] cgroup/misc: Expose root_misc Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-14 22:19   ` Tejun Heo
2022-11-11 18:35 ` [PATCH 19/26] cgroup/misc: Expose parent_misc() Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-14 22:30   ` Tejun Heo
2022-11-14 22:30     ` Tejun Heo
2022-11-11 18:35 ` [PATCH 20/26] cgroup/misc: allow users of misc cgroup to read specific cgroup usage Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-14 22:31   ` Tejun Heo
2022-11-14 22:31     ` Tejun Heo
2022-11-11 18:35 ` [PATCH 21/26] cgroup/misc: allow misc cgroup consumers to read the max value Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-14 22:33   ` Tejun Heo
2022-11-14 22:33     ` Tejun Heo
2022-11-11 18:35 ` [PATCH 22/26] cgroup/misc: Add private per cgroup data to struct misc_cg Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-14 22:34   ` Tejun Heo
2022-11-14 22:34     ` Tejun Heo
2022-11-11 18:35 ` [PATCH 23/26] cgroup/misc: Add tryget functionality for misc controller Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-11 18:35 ` [PATCH 24/26] cgroup/misc: Add SGX EPC resource type Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-11 18:35 ` [PATCH 25/26] x86/sgx: Add support for misc cgroup controller Kristen Carlson Accardi
2022-11-11 18:35   ` Kristen Carlson Accardi
2022-11-14 22:38   ` Tejun Heo
2022-11-14 22:38     ` Tejun Heo
2022-11-11 18:35 ` [PATCH 26/26] Docs/x86/sgx: Add description for cgroup support Kristen Carlson Accardi
2022-11-12  9:28   ` Bagas Sanjaya

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221111183532.3676646-6-kristen@linux.intel.com \
    --to=kristen@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=cgroups@vger.kernel.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=dave.hansen@linux.kernel.org \
    --cc=hpa@zytor.com \
    --cc=jarkko@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-sgx@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=x86@kernel.org \
    --cc=zhiquan1.li@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.