linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Junaid Shahid <junaids@google.com>
To: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org, pbonzini@redhat.com, jmattson@google.com,
	pjt@google.com, oweisse@google.com, alexandre.chartre@oracle.com,
	rppt@linux.ibm.com, dave.hansen@linux.intel.com,
	peterz@infradead.org, tglx@linutronix.de, luto@kernel.org,
	linux-mm@kvack.org
Subject: [RFC PATCH 15/47] kvm: asi: Restricted address space for VM execution
Date: Tue, 22 Feb 2022 21:21:51 -0800	[thread overview]
Message-ID: <20220223052223.1202152-16-junaids@google.com> (raw)
In-Reply-To: <20220223052223.1202152-1-junaids@google.com>

An ASI restricted address space is added for KVM. It is currently only
enabled for Intel CPUs. The ASI hooks have been setup to do an L1D
cache flush and MDS clear when entering the restricted address space.

The hooks are also meant to stun and unstun the sibling hyperthread
when exiting and entering the restricted address space. Internally,
we do have a full stunning implementation available, but it hasn't
yet been determined whether it is fully compatible with the upstream
core scheduling implementation, so it is not included in this patch
series and instead this patch just includes corresponding stub
functions to demonstrate where the stun/unstun would happen.

Signed-off-by: Junaid Shahid <junaids@google.com>


---
 arch/x86/include/asm/kvm_host.h |  2 +
 arch/x86/kvm/vmx/vmx.c          | 41 ++++++++++++-----
 arch/x86/kvm/x86.c              | 81 ++++++++++++++++++++++++++++++++-
 include/linux/kvm_host.h        |  2 +
 4 files changed, 113 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 555f4de47ef2..98cbd6447e3e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1494,6 +1494,8 @@ struct kvm_x86_ops {
 	int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
 
 	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
+
+	void (*flush_sensitive_cpu_state)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0dbf94eb954f..e0178b57be75 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -47,6 +47,7 @@
 #include <asm/spec-ctrl.h>
 #include <asm/virtext.h>
 #include <asm/vmx.h>
+#include <asm/asi.h>
 
 #include "capabilities.h"
 #include "cpuid.h"
@@ -300,7 +301,7 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
 	else
 		static_branch_disable(&vmx_l1d_should_flush);
 
-	if (l1tf == VMENTER_L1D_FLUSH_COND)
+	if (l1tf == VMENTER_L1D_FLUSH_COND && !boot_cpu_has(X86_FEATURE_ASI))
 		static_branch_enable(&vmx_l1d_flush_cond);
 	else
 		static_branch_disable(&vmx_l1d_flush_cond);
@@ -6079,6 +6080,8 @@ static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
 	if (static_branch_likely(&vmx_l1d_flush_cond)) {
 		bool flush_l1d;
 
+		VM_BUG_ON(vcpu->kvm->asi);
+
 		/*
 		 * Clear the per-vcpu flush bit, it gets set again
 		 * either from vcpu_run() or from one of the unsafe
@@ -6590,16 +6593,31 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
 	}
 }
 
-static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
-					struct vcpu_vmx *vmx)
+static void vmx_flush_sensitive_cpu_state(struct kvm_vcpu *vcpu)
 {
-	kvm_guest_enter_irqoff();
-
 	/* L1D Flush includes CPU buffer clear to mitigate MDS */
 	if (static_branch_unlikely(&vmx_l1d_should_flush))
 		vmx_l1d_flush(vcpu);
 	else if (static_branch_unlikely(&mds_user_clear))
 		mds_clear_cpu_buffers();
+}
+
+static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
+					struct vcpu_vmx *vmx)
+{
+	unsigned long cr3;
+
+	kvm_guest_enter_irqoff();
+
+	vmx_flush_sensitive_cpu_state(vcpu);
+
+	asi_enter(vcpu->kvm->asi);
+
+	cr3 = __get_current_cr3_fast();
+	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
+		vmcs_writel(HOST_CR3, cr3);
+		vmx->loaded_vmcs->host_state.cr3 = cr3;
+	}
 
 	if (vcpu->arch.cr2 != native_read_cr2())
 		native_write_cr2(vcpu->arch.cr2);
@@ -6609,13 +6627,16 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 
 	vcpu->arch.cr2 = native_read_cr2();
 
+	VM_WARN_ON_ONCE(vcpu->kvm->asi && !is_asi_active());
+	asi_set_target_unrestricted();
+
 	kvm_guest_exit_irqoff();
 }
 
 static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	unsigned long cr3, cr4;
+	unsigned long cr4;
 
 	/* Record the guest's net vcpu time for enforced NMI injections. */
 	if (unlikely(!enable_vnmi &&
@@ -6657,12 +6678,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	if (kvm_register_is_dirty(vcpu, VCPU_REGS_RIP))
 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 
-	cr3 = __get_current_cr3_fast();
-	if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
-		vmcs_writel(HOST_CR3, cr3);
-		vmx->loaded_vmcs->host_state.cr3 = cr3;
-	}
-
 	cr4 = cr4_read_shadow();
 	if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
 		vmcs_writel(HOST_CR4, cr4);
@@ -7691,6 +7706,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.complete_emulated_msr = kvm_complete_insn_gp,
 
 	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
+
+	.flush_sensitive_cpu_state = vmx_flush_sensitive_cpu_state,
 };
 
 static __init void vmx_setup_user_return_msrs(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e50e97ac4408..dd07f677d084 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -81,6 +81,7 @@
 #include <asm/emulate_prefix.h>
 #include <asm/sgx.h>
 #include <clocksource/hyperv_timer.h>
+#include <asm/asi.h>
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -297,6 +298,8 @@ EXPORT_SYMBOL_GPL(supported_xcr0);
 
 static struct kmem_cache *x86_emulator_cache;
 
+static int __read_mostly kvm_asi_index;
+
 /*
  * When called, it means the previous get/set msr reached an invalid msr.
  * Return true if we want to ignore/silent this failed msr access.
@@ -8620,6 +8623,50 @@ static struct notifier_block pvclock_gtod_notifier = {
 };
 #endif
 
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+
+/*
+ * We have an HT-stunning implementation available internally,
+ * but it is yet to be determined if it is fully compatible with the
+ * upstream core scheduling implementation. So leaving it out for now
+ * and just leaving these stubs here.
+ */
+static void stun_sibling(void) { }
+static void unstun_sibling(void) { }
+
+/*
+ * This function must be fully re-entrant and idempotent.
+ * Though the idempotency requirement could potentially be relaxed for stuff
+ * like stats where complete accuracy is not needed.
+ */
+static void kvm_pre_asi_exit(void)
+{
+	stun_sibling();
+}
+
+/*
+ * This function must be fully re-entrant and idempotent.
+ * Though the idempotency requirement could potentially be relaxed for stuff
+ * like stats where complete accuracy is not needed.
+ */
+static void kvm_post_asi_enter(void)
+{
+	struct kvm_vcpu *vcpu = raw_cpu_read(*kvm_get_running_vcpus());
+
+	kvm_x86_ops.flush_sensitive_cpu_state(vcpu);
+
+	unstun_sibling();
+}
+
+#endif
+
+static const struct asi_hooks kvm_asi_hooks = {
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+	.pre_asi_exit = kvm_pre_asi_exit,
+	.post_asi_enter = kvm_post_asi_enter
+#endif
+};
+
 int kvm_arch_init(void *opaque)
 {
 	struct kvm_x86_init_ops *ops = opaque;
@@ -8674,6 +8721,15 @@ int kvm_arch_init(void *opaque)
 	if (r)
 		goto out_free_percpu;
 
+	if (ops->runtime_ops->flush_sensitive_cpu_state) {
+		r = asi_register_class("KVM", ASI_MAP_STANDARD_NONSENSITIVE,
+				       &kvm_asi_hooks);
+		if (r < 0)
+			goto out_mmu_exit;
+
+		kvm_asi_index = r;
+	}
+
 	kvm_timer_init();
 
 	perf_register_guest_info_callbacks(&kvm_guest_cbs);
@@ -8694,6 +8750,8 @@ int kvm_arch_init(void *opaque)
 
 	return 0;
 
+out_mmu_exit:
+	kvm_mmu_module_exit();
 out_free_percpu:
 	free_percpu(user_return_msrs);
 out_free_x86_emulator_cache:
@@ -8720,6 +8778,11 @@ void kvm_arch_exit(void)
 	irq_work_sync(&pvclock_irq_work);
 	cancel_work_sync(&pvclock_gtod_work);
 #endif
+	if (kvm_asi_index > 0) {
+		asi_unregister_class(kvm_asi_index);
+		kvm_asi_index = 0;
+	}
+
 	kvm_x86_ops.hardware_enable = NULL;
 	kvm_mmu_module_exit();
 	free_percpu(user_return_msrs);
@@ -11391,11 +11454,26 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
 
 	kvm_apicv_init(kvm);
+
+	if (kvm_asi_index > 0) {
+		ret = asi_init(kvm->mm, kvm_asi_index, &kvm->asi);
+		if (ret)
+			goto error;
+	}
+
 	kvm_hv_init_vm(kvm);
 	kvm_mmu_init_vm(kvm);
 	kvm_xen_init_vm(kvm);
 
-	return static_call(kvm_x86_vm_init)(kvm);
+	ret = static_call(kvm_x86_vm_init)(kvm);
+	if (ret)
+		goto error;
+
+	return 0;
+error:
+	kvm_page_track_cleanup(kvm);
+	asi_destroy(kvm->asi);
+	return ret;
 }
 
 int kvm_arch_post_init_vm(struct kvm *kvm)
@@ -11549,6 +11627,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvm_page_track_cleanup(kvm);
 	kvm_xen_destroy_vm(kvm);
 	kvm_hv_destroy_vm(kvm);
+	asi_destroy(kvm->asi);
 }
 
 static void memslot_rmap_free(struct kvm_memory_slot *slot)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c310648cc8f1..9dd63ed21f75 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -38,6 +38,7 @@
 
 #include <asm/kvm_host.h>
 #include <linux/kvm_dirty_ring.h>
+#include <asm/asi.h>
 
 #ifndef KVM_MAX_VCPU_IDS
 #define KVM_MAX_VCPU_IDS KVM_MAX_VCPUS
@@ -551,6 +552,7 @@ struct kvm {
 	 */
 	struct mutex slots_arch_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
+	struct asi *asi;
 	struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 
-- 
2.35.1.473.g83b2b277ed-goog


  parent reply	other threads:[~2022-02-23  5:25 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-23  5:21 [RFC PATCH 00/47] Address Space Isolation for KVM Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 01/47] mm: asi: Introduce ASI core API Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 02/47] mm: asi: Add command-line parameter to enable/disable ASI Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 03/47] mm: asi: Switch to unrestricted address space when entering scheduler Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 04/47] mm: asi: ASI support in interrupts/exceptions Junaid Shahid
2022-03-14 15:50   ` Thomas Gleixner
2022-03-15  2:01     ` Junaid Shahid
2022-03-15 12:55       ` Thomas Gleixner
2022-03-15 22:41         ` Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 05/47] mm: asi: Make __get_current_cr3_fast() ASI-aware Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 06/47] mm: asi: ASI page table allocation and free functions Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 07/47] mm: asi: Functions to map/unmap a memory range into ASI page tables Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 08/47] mm: asi: Add basic infrastructure for global non-sensitive mappings Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 09/47] mm: Add __PAGEFLAG_FALSE Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 10/47] mm: asi: Support for global non-sensitive direct map allocations Junaid Shahid
2022-03-23 21:06   ` Matthew Wilcox
2022-03-23 23:48     ` Junaid Shahid
2022-03-24  1:54       ` Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 11/47] mm: asi: Global non-sensitive vmalloc/vmap support Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 12/47] mm: asi: Support for global non-sensitive slab caches Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 13/47] asi: Added ASI memory cgroup flag Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 14/47] mm: asi: Disable ASI API when ASI is not enabled for a process Junaid Shahid
2022-02-23  5:21 ` Junaid Shahid [this message]
2022-02-23  5:21 ` [RFC PATCH 16/47] mm: asi: Support for mapping non-sensitive pcpu chunks Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 17/47] mm: asi: Aliased direct map for local non-sensitive allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 18/47] mm: asi: Support for pre-ASI-init " Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 19/47] mm: asi: Support for locally nonsensitive page allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 20/47] mm: asi: Support for locally non-sensitive vmalloc allocations Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 21/47] mm: asi: Add support for locally non-sensitive VM_USERMAP pages Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 22/47] mm: asi: Added refcounting when initilizing an asi Junaid Shahid
2022-02-23  5:21 ` [RFC PATCH 23/47] mm: asi: Add support for mapping all userspace memory into ASI Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 24/47] mm: asi: Support for local non-sensitive slab caches Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 25/47] mm: asi: Avoid warning from NMI userspace accesses in ASI context Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 26/47] mm: asi: Use separate PCIDs for restricted address spaces Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 27/47] mm: asi: Avoid TLB flushes during ASI CR3 switches when possible Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 28/47] mm: asi: Avoid TLB flush IPIs to CPUs not in ASI context Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 29/47] mm: asi: Reduce TLB flushes when freeing pages asynchronously Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 30/47] mm: asi: Add API for mapping userspace address ranges Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 31/47] mm: asi: Support for non-sensitive SLUB caches Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 32/47] x86: asi: Allocate FPU state separately when ASI is enabled Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 33/47] kvm: asi: Map guest memory into restricted ASI address space Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 34/47] kvm: asi: Unmap guest memory from ASI address space when using nested virt Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 35/47] mm: asi: asi_exit() on PF, skip handling if address is accessible Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 36/47] mm: asi: Adding support for dynamic percpu ASI allocations Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 37/47] mm: asi: ASI annotation support for static variables Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 38/47] mm: asi: ASI annotation support for dynamic modules Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 39/47] mm: asi: Skip conventional L1TF/MDS mitigations Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 40/47] mm: asi: support for static percpu DEFINE_PER_CPU*_ASI Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 41/47] mm: asi: Annotation of static variables to be nonsensitive Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 42/47] mm: asi: Annotation of PERCPU " Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 43/47] mm: asi: Annotation of dynamic " Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 44/47] kvm: asi: Splitting kvm_vcpu_arch into non/sensitive parts Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 45/47] mm: asi: Mapping global nonsensitive areas in asi_global_init Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 46/47] kvm: asi: Do asi_exit() in vcpu_run loop before returning to userspace Junaid Shahid
2022-02-23  5:22 ` [RFC PATCH 47/47] mm: asi: Properly un/mapping task stack from ASI + tlb flush Junaid Shahid
2022-03-05  3:39 ` [RFC PATCH 00/47] Address Space Isolation for KVM Hyeonggon Yoo
2022-03-16 21:34 ` Alexandre Chartre
2022-03-17 23:25   ` Junaid Shahid
2022-03-22  9:46     ` Alexandre Chartre
2022-03-23 19:35       ` Junaid Shahid
2022-04-08  8:52         ` Alexandre Chartre
2022-04-11  3:26           ` junaid_shahid
2022-03-16 22:49 ` Thomas Gleixner
2022-03-17 21:24   ` Junaid Shahid

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220223052223.1202152-16-junaids@google.com \
    --to=junaids@google.com \
    --cc=alexandre.chartre@oracle.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=jmattson@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=oweisse@google.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjt@google.com \
    --cc=rppt@linux.ibm.com \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).