KVM Archive on lore.kernel.org
 help / color / Atom feed
From: Sean Christopherson <sean.j.christopherson@intel.com>
To: "Paolo Bonzini" <pbonzini@redhat.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>,
	"Thomas Gleixner" <tglx@linutronix.de>,
	"Ingo Molnar" <mingo@redhat.com>,
	"Borislav Petkov" <bp@alien8.de>,
	x86@kernel.org,
	"Jarkko Sakkinen" <jarkko.sakkinen@linux.intel.com>,
	"Sean Christopherson" <sean.j.christopherson@intel.com>,
	"Joerg Roedel" <joro@8bytes.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>,
	kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-sgx@vger.kernel.org, Andy Lutomirski <luto@amacapital.net>
Subject: [RFC PATCH 04/21] x86/sgx: Add /dev/sgx/virt_epc device to allocate "raw" EPC for VMs
Date: Fri, 26 Jul 2019 22:51:57 -0700
Message-ID: <20190727055214.9282-5-sean.j.christopherson@intel.com> (raw)
In-Reply-To: <20190727055214.9282-1-sean.j.christopherson@intel.com>

Add an SGX device to enable userspace to allocate EPC without an
associated enclave.  The intended and only known use case for direct EPC
allocation is to expose EPC to a KVM guest, hence the virt_epc moniker,
virt.{c,h} files and INTEL_SGX_VIRTUALIZATION Kconfig.

Although KVM is the end consumer of EPC, and will need hooks into the
virtual EPC management if oversubscription of EPC for guest is ever
supported (see below), implement direct access to EPC in the SGX
subsystem instead of in KVM.  Doing so has two major advantages:

  - Does not require changes to KVM's uAPI, e.g. EPC gets handled as
    just another memory backend for guests.

  - EPC management is wholly contained in the SGX subsystem, e.g. SGX
    does not have to export any symbols, changes to reclaim flows don't
    need to be routed through KVM, SGX's dirty laundry doesn't have to
    get aired out for the world to see, and so on and so forth.

Oversubscription of EPC for KVM guests is not currently supported.  Due
to the complications of handling reclaim conflicts between guest and
host, KVM EPC oversubscription is expected to be at least an order of
magnitude more complex than basic support for SGX virtualization.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/Kconfig                 |  10 ++
 arch/x86/kernel/cpu/sgx/Makefile |   1 +
 arch/x86/kernel/cpu/sgx/main.c   |   3 +
 arch/x86/kernel/cpu/sgx/sgx.h    |   3 +-
 arch/x86/kernel/cpu/sgx/virt.c   | 253 +++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/sgx/virt.h   |  14 ++
 6 files changed, 283 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/sgx/virt.c
 create mode 100644 arch/x86/kernel/cpu/sgx/virt.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 74ccb1bdea16..c1bdb9f85928 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1961,6 +1961,16 @@ config INTEL_SGX_DRIVER
 
 	  If unsure, say N.
 
+config INTEL_SGX_VIRTUALIZATION
+	bool "Intel SGX Virtualization"
+	depends on INTEL_SGX && KVM_INTEL
+	help
+	  Enabling support for SGX virtualization enables userspace to allocate
+	  "raw" EPC for the purpose of exposing EPC to a KVM guest, i.e. a
+	  virtual machine, via a device node (/dev/sgx/virt_epc by default).
+
+	  If unsure, say N.
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile
index e5d1e862969c..559fd0f9be50 100644
--- a/arch/x86/kernel/cpu/sgx/Makefile
+++ b/arch/x86/kernel/cpu/sgx/Makefile
@@ -1,2 +1,3 @@
 obj-y += encl.o encls.o main.o reclaim.o
 obj-$(CONFIG_INTEL_SGX_DRIVER) += driver/
+obj-$(CONFIG_INTEL_SGX_VIRTUALIZATION) += virt.o
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index 9f4473597620..ead827371139 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -14,6 +14,7 @@
 #include "arch.h"
 #include "encls.h"
 #include "sgx.h"
+#include "virt.h"
 
 struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
 int sgx_nr_epc_sections;
@@ -422,7 +423,9 @@ static __init int sgx_init(void)
 	if (ret)
 		goto err_provision_dev;
 
+	/* Success if the native *or* virtual driver initialized cleanly. */
 	ret = sgx_drv_init();
+	ret = sgx_virt_epc_init() ? ret : 0;
 	if (ret)
 		goto err_provision_cdev;
 
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index a0af8849c7c3..16cdb935aaa7 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -91,7 +91,8 @@ int sgx_einit(struct sgx_sigstruct *sigstruct, struct sgx_einittoken *token,
 
 #define SGX_ENCL_DEV_MINOR	0
 #define SGX_PROV_DEV_MINOR	1
-#define SGX_MAX_NR_DEVICES	2
+#define SGX_VIRT_DEV_MINOR	2
+#define SGX_MAX_NR_DEVICES	3
 
 __init int sgx_dev_init(const char *name, struct device *dev,
 			struct cdev *cdev, const struct file_operations *fops,
diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
new file mode 100644
index 000000000000..79ee5917a4fc
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/virt.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/cdev.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <uapi/asm/sgx.h>
+
+#include "encls.h"
+#include "sgx.h"
+#include "virt.h"
+
+struct sgx_virt_epc_page {
+	struct sgx_epc_page *epc_page;
+};
+
+struct sgx_virt_epc {
+	struct radix_tree_root page_tree;
+	struct rw_semaphore lock;
+};
+
+static inline unsigned long sgx_virt_epc_calc_index(struct vm_area_struct *vma,
+						    unsigned long addr)
+{
+	return vma->vm_pgoff + PFN_DOWN(addr - vma->vm_start);
+}
+
+static struct sgx_virt_epc_page *__sgx_virt_epc_fault(struct sgx_virt_epc *epc,
+						      struct vm_area_struct *vma,
+						      unsigned long addr)
+{
+	struct sgx_virt_epc_page *page;
+	struct sgx_epc_page *epc_page;
+	unsigned long index;
+	int ret;
+
+	index = sgx_virt_epc_calc_index(vma, addr);
+
+	page = radix_tree_lookup(&epc->page_tree, index);
+	if (page) {
+		if (page->epc_page)
+			return page;
+	} else {
+		page = kzalloc(sizeof(*page), GFP_KERNEL);
+		if (!page)
+			return ERR_PTR(-ENOMEM);
+
+		ret = radix_tree_insert(&epc->page_tree, index, page);
+		if (unlikely(ret)) {
+			kfree(page);
+			return ERR_PTR(ret);
+		}
+	}
+
+	epc_page = sgx_alloc_page(&epc, false);
+	if (IS_ERR(epc_page))
+		return ERR_CAST(epc_page);
+
+	ret = vmf_insert_pfn(vma, addr, PFN_DOWN(epc_page->desc));
+	if (unlikely(ret != VM_FAULT_NOPAGE)) {
+		sgx_free_page(epc_page);
+		return ERR_PTR(-EFAULT);
+	}
+
+	page->epc_page = epc_page;
+
+	return page;
+}
+
+static vm_fault_t sgx_virt_epc_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct sgx_virt_epc *epc = (struct sgx_virt_epc *)vma->vm_private_data;
+	struct sgx_virt_epc_page *page;
+
+	down_write(&epc->lock);
+	page = __sgx_virt_epc_fault(epc, vma, vmf->address);
+	up_write(&epc->lock);
+
+	if (!IS_ERR(page) || signal_pending(current))
+		return VM_FAULT_NOPAGE;
+
+	if (PTR_ERR(page) == -EBUSY && (vmf->flags & FAULT_FLAG_ALLOW_RETRY)) {
+		up_read(&vma->vm_mm->mmap_sem);
+		return VM_FAULT_RETRY;
+	}
+
+	return VM_FAULT_SIGBUS;
+}
+
+static struct sgx_virt_epc_page *sgx_virt_epc_get_page(struct sgx_virt_epc *epc,
+						       unsigned long index)
+{
+	struct sgx_virt_epc_page *page;
+
+	down_read(&epc->lock);
+	page = radix_tree_lookup(&epc->page_tree, index);
+	if (!page || !page->epc_page)
+		page = ERR_PTR(-EFAULT);
+	up_read(&epc->lock);
+
+	return page;
+}
+
+static int sgx_virt_epc_access(struct vm_area_struct *vma, unsigned long start,
+			       void *buf, int len, int write)
+{
+	/* EDBG{RD,WR} are naturally sized, i.e. always 8-byte on 64-bit. */
+	unsigned char data[sizeof(unsigned long)];
+	struct sgx_virt_epc_page *page;
+	struct sgx_virt_epc *epc;
+	unsigned long addr, index;
+	int offset, cnt, i;
+	int ret = 0;
+	void *p;
+
+	epc = vma->vm_private_data;
+
+	for (i = 0; i < len && !ret; i += cnt) {
+		addr = start + i;
+		if (i == 0 || PFN_DOWN(addr) != PFN_DOWN(addr - cnt))
+			index = sgx_virt_epc_calc_index(vma, addr);
+
+		page = sgx_virt_epc_get_page(epc, index);
+
+		/*
+		 * EDBG{RD,WR} require an active enclave, and given that VMM
+		 * EPC oversubscription isn't supported, a not-present EPC page
+		 * means the guest hasn't accessed the page and therefore can't
+		 * possibility have added the page to an enclave.
+		 */
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+
+		offset = addr & (sizeof(unsigned long) - 1);
+		addr = ALIGN_DOWN(addr, sizeof(unsigned long));
+		cnt = min((int)sizeof(unsigned long) - offset, len - i);
+
+		p = sgx_epc_addr(page->epc_page) + (addr & ~PAGE_MASK);
+
+		/* EDBGRD for read, or to do RMW for a partial write. */
+		if (!write || cnt != sizeof(unsigned long))
+			ret = __edbgrd(p, (void *)data);
+
+		if (!ret) {
+			if (write) {
+				memcpy(data + offset, buf + i, cnt);
+				ret = __edbgwr(p, (void *)data);
+			} else {
+				memcpy(buf + i, data + offset, cnt);
+			}
+		}
+	}
+
+	if (ret)
+		return -EIO;
+	return i;
+}
+
+const struct vm_operations_struct sgx_virt_epc_vm_ops = {
+	.fault = sgx_virt_epc_fault,
+	.access = sgx_virt_epc_access,
+};
+
+static int sgx_virt_epc_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma->vm_ops = &sgx_virt_epc_vm_ops;
+	vma->vm_flags |= VM_PFNMAP | VM_IO | VM_DONTDUMP;
+	vma->vm_private_data = file->private_data;
+
+	return 0;
+}
+
+static int sgx_virt_epc_release(struct inode *inode, struct file *file)
+{
+	struct sgx_virt_epc *epc = file->private_data;
+	struct radix_tree_iter iter;
+	struct sgx_virt_epc_page *page;
+	void **slot;
+
+	LIST_HEAD(secs_pages);
+
+	radix_tree_for_each_slot(slot, &epc->page_tree, &iter, 0) {
+		page = *slot;
+		if (page->epc_page && __sgx_free_page(page->epc_page))
+			continue;
+		kfree(page);
+		radix_tree_delete(&epc->page_tree, iter.index);
+	}
+
+	/*
+	 * Because we don't track which pages are SECS pages, it's possible
+	 * for EREMOVE to fail, e.g. a SECS page can have children if a VM
+	 * shutdown unexpectedly.  Retry all failed pages after iterating
+	 * through the entire tree, at which point all children should be
+	 * removed and the SECS pages can be nuked as well.
+	 */
+	radix_tree_for_each_slot(slot, &epc->page_tree, &iter, 0) {
+		page = *slot;
+		if (!(WARN_ON(!page->epc_page)))
+			sgx_free_page(page->epc_page);
+		radix_tree_delete(&epc->page_tree, iter.index);
+	}
+
+	kfree(epc);
+
+	return 0;
+}
+
+static int sgx_virt_epc_open(struct inode *inode, struct file *file)
+{
+	struct sgx_virt_epc *epc;
+
+	epc = kzalloc(sizeof(struct sgx_virt_epc), GFP_KERNEL);
+	if (!epc)
+		return -ENOMEM;
+
+	init_rwsem(&epc->lock);
+	INIT_RADIX_TREE(&epc->page_tree, GFP_KERNEL);
+
+	file->private_data = epc;
+
+	return 0;
+}
+
+static const struct file_operations sgx_virt_epc_fops = {
+	.owner			= THIS_MODULE,
+	.open			= sgx_virt_epc_open,
+	.release		= sgx_virt_epc_release,
+	.mmap			= sgx_virt_epc_mmap,
+};
+
+static struct device sgx_virt_epc_dev;
+static struct cdev sgx_virt_epc_cdev;
+
+int __init sgx_virt_epc_init(void)
+{
+	int ret = sgx_dev_init("sgx/virt_epc", &sgx_virt_epc_dev,
+			       &sgx_virt_epc_cdev, &sgx_virt_epc_fops,
+			       SGX_VIRT_DEV_MINOR);
+	if (ret)
+		return ret;
+
+	ret = cdev_device_add(&sgx_virt_epc_cdev, &sgx_virt_epc_dev);
+	if (ret)
+		put_device(&sgx_virt_epc_dev);
+
+	return ret;
+}
diff --git a/arch/x86/kernel/cpu/sgx/virt.h b/arch/x86/kernel/cpu/sgx/virt.h
new file mode 100644
index 000000000000..436170412b98
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/virt.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+#ifndef _ASM_X86_SGX_VIRT_H
+#define _ASM_X86_SGX_VIRT_H
+
+#ifdef CONFIG_INTEL_SGX_VIRTUALIZATION
+int __init sgx_virt_epc_init(void);
+#else
+static inline int __init sgx_virt_epc_init(void)
+{
+	return -ENODEV;
+}
+#endif
+
+#endif /* _ASM_X86_SGX_VIRT_H */
-- 
2.22.0


  parent reply index

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-27  5:51 [RFC PATCH 00/21] x86/sgx: KVM: Add SGX virtualization Sean Christopherson
2019-07-27  5:51 ` [RFC PATCH 01/21] x86/sgx: Add defines for SGX device minor numbers Sean Christopherson
2019-07-27  5:51 ` [RFC PATCH 02/21] x86/sgx: Move bus registration and device init to common code Sean Christopherson
2019-07-27  5:51 ` [RFC PATCH 03/21] x86/sgx: Move provisioning device " Sean Christopherson
2019-07-27  5:51 ` Sean Christopherson [this message]
2019-07-27 17:44   ` [RFC PATCH 04/21] x86/sgx: Add /dev/sgx/virt_epc device to allocate "raw" EPC for VMs Andy Lutomirski
2019-07-29 17:05     ` Sean Christopherson
2019-07-27  5:51 ` [RFC PATCH 05/21] x86/sgx: Expose SGX architectural definitions to the kernel Sean Christopherson
2019-07-27  5:51 ` [RFC PATCH 06/21] KVM: x86: Add SGX sub-features leaf to reverse CPUID table Sean Christopherson
2019-07-27  5:52 ` [RFC PATCH 07/21] KVM: x86: Add WARN_ON_ONCE(index!=0) in __do_cpuid_ent Sean Christopherson
2019-07-27  5:52 ` [RFC PATCH 08/21] KVM: x86: Add kvm_x86_ops hook to short circuit emulation Sean Christopherson
2019-07-27 17:38   ` Andy Lutomirski
2019-07-30  2:49     ` Sean Christopherson
2019-08-16  0:47       ` Andy Lutomirski
2019-08-19 22:01         ` Sean Christopherson
2019-08-20  1:34           ` Andy Lutomirski
2019-08-20  1:41             ` Sean Christopherson
2019-07-30  3:08   ` Sean Christopherson
2019-07-27  5:52 ` [RFC PATCH 09/21] KVM: VMX: Add basic handling of VM-Exit from SGX enclave Sean Christopherson
2019-07-27  5:52 ` [RFC PATCH 10/21] KVM: x86: Export kvm_mmu_gva_to_gpa_{read,write}() for VMX/SGX Sean Christopherson
2019-07-27  5:52 ` [RFC PATCH 11/21] KVM: x86: Export kvm_propagate_fault (as kvm_propagate_page_fault) Sean Christopherson
2019-07-27  5:52 ` [RFC PATCH 12/21] KVM: x86: Define new #PF SGX error code bit Sean Christopherson
2019-07-27  5:52 ` [RFC PATCH 13/21] x86/sgx: Move the intermediate EINIT helper into the driver Sean Christopherson
2019-07-27  5:52 ` [RFC PATCH 14/21] x86/sgx: Add helpers to expose ECREATE and EINIT to KVM Sean Christopherson
2019-07-27  5:52 ` [RFC PATCH 15/21] KVM: VMX: Add SGX ENCLS[ECREATE] handler to enforce CPUID restrictions Sean Christopherson
2019-07-27  5:52 ` [RFC PATCH 16/21] KVM: VMX: Edd emulation of SGX Launch Control LE hash MSRs Sean Christopherson
2019-07-27  5:52 ` [RFC PATCH 17/21] KVM: VMX: Add handler for ENCLS[EINIT] to support SGX Launch Control Sean Christopherson
2019-07-27  5:52 ` [RFC PATCH 18/21] KVM: x86: Invoke kvm_x86_ops->cpuid_update() after kvm_update_cpuid() Sean Christopherson
2019-07-27  5:52 ` [RFC PATCH 19/21] KVM: VMX: Enable SGX virtualization for SGX1, SGX2 and LC Sean Christopherson
2019-07-27  5:52 ` [RFC PATCH 20/21] x86/sgx: Export sgx_set_attribute() for use by KVM Sean Christopherson
2019-07-27  5:52 ` [RFC PATCH 21/21] KVM: x86: Add capability to grant VM access to privileged SGX attribute Sean Christopherson
2019-07-27 17:32   ` Andy Lutomirski

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190727055214.9282-5-sean.j.christopherson@intel.com \
    --to=sean.j.christopherson@intel.com \
    --cc=bp@alien8.de \
    --cc=hpa@zytor.com \
    --cc=jarkko.sakkinen@linux.intel.com \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-sgx@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=rkrcmar@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

KVM Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/kvm/0 kvm/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 kvm kvm/ https://lore.kernel.org/kvm \
		kvm@vger.kernel.org kvm@archiver.kernel.org
	public-inbox-index kvm


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.kvm


AGPL code for this site: git clone https://public-inbox.org/ public-inbox