All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH] kvm: x86: fix stale mmio cache bug
@ 2014-08-01 23:54 David Matlack
  2014-08-02  4:15 ` Xiao Guangrong
  0 siblings, 1 reply; 4+ messages in thread
From: David Matlack @ 2014-08-01 23:54 UTC (permalink / raw)
  To: Gleb Natapov, Paolo Bonzini, Xiao Guangrong, kvm, x86
  Cc: Eric Northup, David Matlack

The following events can lead to an incorrect KVM_EXIT_MMIO bubbling
up to userspace:

(1) Guest accesses gpa X without a memory slot. The gfn is cached in
struct kvm_vcpu_arch (mmio_gfn). On Intel EPT-enabled hosts, KVM sets
the SPTE write-execute-noread so that future accesses cause
EPT_MISCONFIGs.

(2) Host userspace creates a memory slot via KVM_SET_USER_MEMORY_REGION
covering the page just accessed.

(3) Guest attempts to read or write to gpa X again. On Intel, this
generates an EPT_MISCONFIG. The memory slot generation number that
was incremented in (2) would normally take care of this but we fast
path mmio faults through quickly_check_mmio_pf(), which only checks
the per-vcpu mmio cache. Since we hit the cache, KVM passes a
KVM_EXIT_MMIO up to userspace.

This patch fixes the issue by clearing the mmio cache in the
KVM_MR_CREATE code path.
 - introduce KVM_REQ_CLEAR_MMIO_CACHE for clearing all vcpu mmio
   caches.
 - extend vcpu_clear_mmio_info to clear mmio_gfn in addition to
   mmio_gva, since both can be used to fast path mmio faults.
 - issue KVM_REQ_CLEAR_MMIO_CACHE during memslot creation to flush
   the mmio cache.
 - in mmu_sync_roots, unconditionally clear the mmio cache since
   even direct_map (e.g. tdp) hosts use it.

Signed-off-by: David Matlack <dmatlack@google.com>
---
 arch/x86/kvm/mmu.c       |  3 ++-
 arch/x86/kvm/x86.c       |  5 +++++
 arch/x86/kvm/x86.h       |  8 +++++---
 include/linux/kvm_host.h |  2 ++
 virt/kvm/kvm_main.c      | 10 +++++-----
 5 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9314678..8d50b84 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3157,13 +3157,14 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
 	int i;
 	struct kvm_mmu_page *sp;
 
+	vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
+
 	if (vcpu->arch.mmu.direct_map)
 		return;
 
 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
 		return;
 
-	vcpu_clear_mmio_info(vcpu, ~0ul);
 	kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
 	if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
 		hpa_t root = vcpu->arch.mmu.root_hpa;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ef432f8..05b5629 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6001,6 +6001,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_deliver_pmi(vcpu);
 		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
 			vcpu_scan_ioapic(vcpu);
+
+		if (kvm_check_request(KVM_REQ_CLEAR_MMIO_CACHE, vcpu))
+			vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
@@ -7281,6 +7284,8 @@ void kvm_arch_memslots_updated(struct kvm *kvm)
 	 * mmio generation may have reached its maximum value.
 	 */
 	kvm_mmu_invalidate_mmio_sptes(kvm);
+
+	kvm_make_all_vcpus_request(kvm, KVM_REQ_CLEAR_MMIO_CACHE);
 }
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 8c97bac..41ef197 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -81,15 +81,17 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
 }
 
 /*
- * Clear the mmio cache info for the given gva,
- * specially, if gva is ~0ul, we clear all mmio cache info.
+ * Clear the mmio cache info for the given gva. If gva is MMIO_GVA_ANY,
+ * unconditionally clear the mmio cache.
  */
+#define MMIO_GVA_ANY (~0ul)
 static inline void vcpu_clear_mmio_info(struct kvm_vcpu *vcpu, gva_t gva)
 {
-	if (gva != (~0ul) && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
+	if (gva != MMIO_GVA_ANY && vcpu->arch.mmio_gva != (gva & PAGE_MASK))
 		return;
 
 	vcpu->arch.mmio_gva = 0;
+	vcpu->arch.mmio_gfn = 0;
 }
 
 static inline bool vcpu_match_mmio_gva(struct kvm_vcpu *vcpu, unsigned long gva)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ec4e3bd..e4edaff 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -136,6 +136,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
 #define KVM_REQ_ENABLE_IBS        23
 #define KVM_REQ_DISABLE_IBS       24
+#define KVM_REQ_CLEAR_MMIO_CACHE  25
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
@@ -591,6 +592,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 
+bool kvm_make_all_vcpus_request(struct kvm *kvm, unsigned int req);
 void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
 void kvm_make_mclock_inprogress_request(struct kvm *kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4b6c01b..d09527a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -152,7 +152,7 @@ static void ack_flush(void *_completed)
 {
 }
 
-static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
+bool kvm_make_all_vcpus_request(struct kvm *kvm, unsigned int req)
 {
 	int i, cpu, me;
 	cpumask_var_t cpus;
@@ -189,7 +189,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	long dirty_count = kvm->tlbs_dirty;
 
 	smp_mb();
-	if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+	if (kvm_make_all_vcpus_request(kvm, KVM_REQ_TLB_FLUSH))
 		++kvm->stat.remote_tlb_flush;
 	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
@@ -197,17 +197,17 @@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
 
 void kvm_reload_remote_mmus(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+	kvm_make_all_vcpus_request(kvm, KVM_REQ_MMU_RELOAD);
 }
 
 void kvm_make_mclock_inprogress_request(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+	kvm_make_all_vcpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
 }
 
 void kvm_make_scan_ioapic_request(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+	kvm_make_all_vcpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
 }
 
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
-- 
2.0.0.526.g5318336


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [RFC][PATCH] kvm: x86: fix stale mmio cache bug
  2014-08-01 23:54 [RFC][PATCH] kvm: x86: fix stale mmio cache bug David Matlack
@ 2014-08-02  4:15 ` Xiao Guangrong
  2014-08-04 12:44   ` Paolo Bonzini
  0 siblings, 1 reply; 4+ messages in thread
From: Xiao Guangrong @ 2014-08-02  4:15 UTC (permalink / raw)
  To: David Matlack; +Cc: Gleb Natapov, Paolo Bonzini, kvm, x86, Eric Northup


On Aug 2, 2014, at 7:54 AM, David Matlack <dmatlack@google.com> wrote:

> The following events can lead to an incorrect KVM_EXIT_MMIO bubbling
> up to userspace:
> 
> (1) Guest accesses gpa X without a memory slot. The gfn is cached in
> struct kvm_vcpu_arch (mmio_gfn). On Intel EPT-enabled hosts, KVM sets
> the SPTE write-execute-noread so that future accesses cause
> EPT_MISCONFIGs.
> 
> (2) Host userspace creates a memory slot via KVM_SET_USER_MEMORY_REGION
> covering the page just accessed.
> 
> (3) Guest attempts to read or write to gpa X again. On Intel, this
> generates an EPT_MISCONFIG. The memory slot generation number that
> was incremented in (2) would normally take care of this but we fast
> path mmio faults through quickly_check_mmio_pf(), which only checks
> the per-vcpu mmio cache. Since we hit the cache, KVM passes a
> KVM_EXIT_MMIO up to userspace.
> 

Good catch, thank you, David!

> This patch fixes the issue by clearing the mmio cache in the
> KVM_MR_CREATE code path.
> - introduce KVM_REQ_CLEAR_MMIO_CACHE for clearing all vcpu mmio
>  caches.
> - extend vcpu_clear_mmio_info to clear mmio_gfn in addition to
>  mmio_gva, since both can be used to fast path mmio faults.
> - issue KVM_REQ_CLEAR_MMIO_CACHE during memslot creation to flush
>  the mmio cache.
> - in mmu_sync_roots, unconditionally clear the mmio cache since
>  even direct_map (e.g. tdp) hosts use it.

I prefer to also caching the spte’s generation number, then check the number
in quickly_check_mmio_pf().


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC][PATCH] kvm: x86: fix stale mmio cache bug
  2014-08-02  4:15 ` Xiao Guangrong
@ 2014-08-04 12:44   ` Paolo Bonzini
  2014-08-04 16:08     ` David Matlack
  0 siblings, 1 reply; 4+ messages in thread
From: Paolo Bonzini @ 2014-08-04 12:44 UTC (permalink / raw)
  To: Xiao Guangrong, David Matlack; +Cc: Gleb Natapov, kvm, x86, Eric Northup

Il 02/08/2014 06:15, Xiao Guangrong ha scritto:
> I prefer to also caching the spte’s generation number, then check the number
> in quickly_check_mmio_pf().

I agree, thanks Xiao for the review and David for the report!

Paolo

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC][PATCH] kvm: x86: fix stale mmio cache bug
  2014-08-04 12:44   ` Paolo Bonzini
@ 2014-08-04 16:08     ` David Matlack
  0 siblings, 0 replies; 4+ messages in thread
From: David Matlack @ 2014-08-04 16:08 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Xiao Guangrong, Gleb Natapov, kvm, x86, Eric Northup

On Mon, Aug 4, 2014 at 5:44 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
> Il 02/08/2014 06:15, Xiao Guangrong ha scritto:
>> I prefer to also caching the spte’s generation number, then check the number
>> in quickly_check_mmio_pf().
>
> I agree, thanks Xiao for the review and David for the report!

I like this approach as well. I'll send out a v2.

Thanks for the reviews!

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2014-08-04 16:09 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-01 23:54 [RFC][PATCH] kvm: x86: fix stale mmio cache bug David Matlack
2014-08-02  4:15 ` Xiao Guangrong
2014-08-04 12:44   ` Paolo Bonzini
2014-08-04 16:08     ` David Matlack

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.