Virtualization Archive on lore.kernel.org
 help / color / Atom feed
From: "Adalbert Lazăr" <alazar@bitdefender.com>
To: kvm@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Sean Christopherson" <sean.j.christopherson@intel.com>,
	"Adalbert Lazăr" <alazar@bitdefender.com>
Subject: [RFC PATCH v1 22/34] KVM: VMX: Suppress EPT violation #VE by default (when enabled)
Date: Wed, 22 Jul 2020 19:01:09 +0300
Message-ID: <20200722160121.9601-23-alazar@bitdefender.com> (raw)
In-Reply-To: <20200722160121.9601-1-alazar@bitdefender.com>

From: Sean Christopherson <sean.j.christopherson@intel.com>

Unfortunately (for software), EPT violation #VEs are opt-out on a per
page basis, e.g. a not-present EPT violation on a zeroed EPT entry will
be morphed to a #VE due to the "suppress #VE" bit not being set.

When EPT violation #VEs are enabled, use a variation of clear_page()
that sets bit 63 (suppress #VE) in all 8-byte entries.  To wire up the
new behavior in the x86 MMU, add a new kvm_x86_ops hook and a new mask
to define a "shadow init value", which is needed to express the concept
that a cleared spte has a non-zero value when EPT violation #VEs are in
use.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Adalbert Lazăr <alazar@bitdefender.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/Makefile           |  2 +-
 arch/x86/kvm/mmu.h              |  1 +
 arch/x86/kvm/mmu/mmu.c          | 22 +++++++++++++++-------
 arch/x86/kvm/vmx/clear_page.S   | 17 +++++++++++++++++
 arch/x86/kvm/vmx/vmx.c          | 18 +++++++++++++++---
 6 files changed, 50 insertions(+), 11 deletions(-)
 create mode 100644 arch/x86/kvm/vmx/clear_page.S

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a9f225f9dd12..e89cea041ec9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1168,6 +1168,7 @@ struct kvm_x86_ops {
 	 * the implementation may choose to ignore if appropriate.
 	 */
 	void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
+	void (*clear_page)(void *page);
 
 	/*
 	 * Flush any TLB entries created by the guest.  Like tlb_flush_gva(),
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 3cfe76299dee..b5972a3fdfee 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -19,7 +19,7 @@ kvm-y			+= x86.o emulate.o i8259.o irq.o lapic.o \
 			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
 			   hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o
 
-kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
+kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o vmx/clear_page.o
 kvm-amd-y		+= svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o
 
 obj-$(CONFIG_KVM)	+= kvm.o
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2692b14fb605..02fa0d30407f 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -52,6 +52,7 @@ static inline u64 rsvd_bits(int s, int e)
 }
 
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask);
+void kvm_mmu_set_spte_init_value(u64 init_value);
 
 void
 reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 22c83192bba1..810e22f41306 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -253,6 +253,7 @@ static u64 __read_mostly shadow_mmio_value;
 static u64 __read_mostly shadow_mmio_access_mask;
 static u64 __read_mostly shadow_present_mask;
 static u64 __read_mostly shadow_me_mask;
+static u64 __read_mostly shadow_init_value;
 
 /*
  * SPTEs used by MMUs without A/D bits are marked with SPTE_AD_DISABLED_MASK;
@@ -542,6 +543,12 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
+void kvm_mmu_set_spte_init_value(u64 init_value)
+{
+	shadow_init_value = init_value;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_set_spte_init_value);
+
 static u8 kvm_get_shadow_phys_bits(void)
 {
 	/*
@@ -572,6 +579,7 @@ static void kvm_mmu_reset_all_pte_masks(void)
 	shadow_x_mask = 0;
 	shadow_present_mask = 0;
 	shadow_acc_track_mask = 0;
+	shadow_init_value = 0;
 
 	shadow_phys_bits = kvm_get_shadow_phys_bits();
 
@@ -612,7 +620,7 @@ static int is_nx(struct kvm_vcpu *vcpu)
 
 static int is_shadow_present_pte(u64 pte)
 {
-	return (pte != 0) && !is_mmio_spte(pte);
+	return (pte != 0) && pte != shadow_init_value && !is_mmio_spte(pte);
 }
 
 static int is_large_pte(u64 pte)
@@ -923,9 +931,9 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
 	u64 old_spte = *sptep;
 
 	if (!spte_has_volatile_bits(old_spte))
-		__update_clear_spte_fast(sptep, 0ull);
+		__update_clear_spte_fast(sptep, shadow_init_value);
 	else
-		old_spte = __update_clear_spte_slow(sptep, 0ull);
+		old_spte = __update_clear_spte_slow(sptep, shadow_init_value);
 
 	if (!is_shadow_present_pte(old_spte))
 		return 0;
@@ -955,7 +963,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
  */
 static void mmu_spte_clear_no_track(u64 *sptep)
 {
-	__update_clear_spte_fast(sptep, 0ull);
+	__update_clear_spte_fast(sptep, shadow_init_value);
 }
 
 static u64 mmu_spte_get_lockless(u64 *sptep)
@@ -2660,7 +2668,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 		if (level > PG_LEVEL_4K && need_sync)
 			flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
 	}
-	clear_page(sp->spt);
+	kvm_x86_ops.clear_page(sp->spt);
 	trace_kvm_mmu_get_page(sp, true);
 
 	kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
@@ -3637,7 +3645,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 	struct kvm_shadow_walk_iterator iterator;
 	struct kvm_mmu_page *sp;
 	bool fault_handled = false;
-	u64 spte = 0ull;
+	u64 spte = shadow_init_value;
 	uint retry_count = 0;
 
 	if (!page_fault_can_be_fast(error_code))
@@ -4073,7 +4081,7 @@ static bool
 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
 	struct kvm_shadow_walk_iterator iterator;
-	u64 sptes[PT64_ROOT_MAX_LEVEL], spte = 0ull;
+	u64 sptes[PT64_ROOT_MAX_LEVEL], spte = shadow_init_value;
 	struct rsvd_bits_validate *rsvd_check;
 	int root, leaf;
 	bool reserved = false;
diff --git a/arch/x86/kvm/vmx/clear_page.S b/arch/x86/kvm/vmx/clear_page.S
new file mode 100644
index 000000000000..89fcf5697391
--- /dev/null
+++ b/arch/x86/kvm/vmx/clear_page.S
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+
+/*
+ * "Clear" an EPT page when EPT violation #VEs are enabled, in which case the
+ * suppress #VE bit needs to be set for all unused entries.
+ *
+ * %rdi	- page
+ */
+#define VMX_EPT_SUPPRESS_VE_BIT (1ull << 63)
+
+SYM_FUNC_START(vmx_suppress_ve_clear_page)
+	movl $4096/8,%ecx
+	movabsq $0x8000000000000000,%rax
+	rep stosq
+	ret
+SYM_FUNC_END(vmx_suppress_ve_clear_page)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1c1dda14d18d..3428857c6157 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5639,14 +5639,24 @@ static void wakeup_handler(void)
 	spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
 }
 
+void vmx_suppress_ve_clear_page(void *page);
+
 static void vmx_enable_tdp(void)
 {
+	u64 p_mask = 0;
+
+	if (!cpu_has_vmx_ept_execute_only())
+		p_mask |= VMX_EPT_READABLE_MASK;
+	if (kvm_ve_supported) {
+		p_mask |= VMX_EPT_SUPPRESS_VE_BIT;
+		kvm_mmu_set_spte_init_value(VMX_EPT_SUPPRESS_VE_BIT);
+		kvm_x86_ops.clear_page = vmx_suppress_ve_clear_page;
+	}
+
 	kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
 		enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull,
 		enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull,
-		0ull, VMX_EPT_EXECUTABLE_MASK,
-		cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK,
-		VMX_EPT_RWX_MASK, 0ull);
+		0ull, VMX_EPT_EXECUTABLE_MASK, p_mask, VMX_EPT_RWX_MASK, 0ull);
 
 	ept_set_mmio_spte_mask();
 }
@@ -8238,6 +8248,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.tlb_flush_gva = vmx_flush_tlb_gva,
 	.tlb_flush_guest = vmx_flush_tlb_guest,
 
+	.clear_page = clear_page,
+
 	.run = vmx_vcpu_run,
 	.handle_exit = vmx_handle_exit,
 	.skip_emulated_instruction = vmx_skip_emulated_instruction,

  parent reply index

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-22 16:00 [RFC PATCH v1 00/34] VM introspection - EPT Views and Virtualization Exceptions Adalbert Lazăr
2020-07-22 16:00 ` [RFC PATCH v1 01/34] KVM: x86: export .get_vmfunc_status() Adalbert Lazăr
2020-07-22 16:00 ` [RFC PATCH v1 02/34] KVM: x86: export .get_eptp_switching_status() Adalbert Lazăr
2020-07-22 16:00 ` [RFC PATCH v1 03/34] KVM: x86: add kvm_get_ept_view() Adalbert Lazăr
2020-07-22 16:00 ` [RFC PATCH v1 04/34] KVM: x86: mmu: reindent to avoid lines longer than 80 chars Adalbert Lazăr
2020-07-22 16:00 ` [RFC PATCH v1 05/34] KVM: x86: mmu: add EPT view parameter to kvm_mmu_get_page() Adalbert Lazăr
2020-07-22 16:00 ` [RFC PATCH v1 06/34] KVM: x86: mmu: add support for EPT switching Adalbert Lazăr
2020-07-22 16:00 ` [RFC PATCH v1 07/34] KVM: x86: mmu: increase mmu_memory_cache size Adalbert Lazăr
2020-07-22 16:00 ` [RFC PATCH v1 08/34] KVM: x86: add .set_ept_view() Adalbert Lazăr
2020-07-22 16:00 ` [RFC PATCH v1 09/34] KVM: x86: add .control_ept_view() Adalbert Lazăr
2020-07-22 16:00 ` [RFC PATCH v1 10/34] KVM: x86: page track: allow page tracking for different EPT views Adalbert Lazăr
2020-07-22 16:00 ` [RFC PATCH v1 11/34] KVM: x86: mmu: allow zapping shadow pages for specific " Adalbert Lazăr
2020-07-22 16:00 ` [RFC PATCH v1 12/34] KVM: introspection: extend struct kvmi_features with the EPT views status support Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 13/34] KVM: introspection: add KVMI_VCPU_GET_EPT_VIEW Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 14/34] KVM: introspection: add 'view' field to struct kvmi_event_arch Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 15/34] KVM: introspection: add KVMI_VCPU_SET_EPT_VIEW Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 16/34] KVM: introspection: add KVMI_VCPU_CONTROL_EPT_VIEW Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 17/34] KVM: introspection: extend the access rights database with EPT view info Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 18/34] KVM: introspection: extend KVMI_VM_SET_PAGE_ACCESS " Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 19/34] KVM: introspection: clean non-default EPTs on unhook Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 20/34] KVM: x86: vmx: add support for virtualization exceptions Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 21/34] KVM: VMX: Define EPT suppress #VE bit (bit 63 in EPT leaf entries) Adalbert Lazăr
2020-07-22 16:01 ` Adalbert Lazăr [this message]
2020-07-22 16:01 ` [RFC PATCH v1 23/34] KVM: x86: mmu: fix: update present_mask in spte_read_protect() Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 24/34] KVM: vmx: trigger vm-exits for mmio sptes by default when #VE is enabled Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 25/34] KVM: x86: svm: set .clear_page() Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 26/34] KVM: x86: add .set_ve_info() Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 27/34] KVM: x86: add .disable_ve() Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 28/34] KVM: x86: page_track: add support for suppress #VE bit Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 29/34] KVM: vmx: make use of EPTP_INDEX in vmx_handle_exit() Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 30/34] KVM: vmx: make use of EPTP_INDEX in vmx_set_ept_view() Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 31/34] KVM: introspection: add #VE host capability checker Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 32/34] KVM: introspection: add KVMI_VCPU_SET_VE_INFO/KVMI_VCPU_DISABLE_VE Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 33/34] KVM: introspection: mask out non-rwx flags when reading/writing from/to the internal database Adalbert Lazăr
2020-07-22 16:01 ` [RFC PATCH v1 34/34] KVM: introspection: add KVMI_VM_SET_PAGE_SVE Adalbert Lazăr

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200722160121.9601-23-alazar@bitdefender.com \
    --to=alazar@bitdefender.com \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=sean.j.christopherson@intel.com \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Virtualization Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/virtualization/0 virtualization/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 virtualization virtualization/ https://lore.kernel.org/virtualization \
		virtualization@lists.linuxfoundation.org virtualization@lists.linux-foundation.org
	public-inbox-index virtualization

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.linuxfoundation.lists.virtualization


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git