[PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set

* [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set
@ 2018-02-16 10:22 Razvan Cojocaru
  2018-02-16 11:17 ` Jan Beulich
  2018-02-23  4:53 ` Tian, Kevin
  0 siblings, 2 replies; 13+ messages in thread
From: Razvan Cojocaru @ 2018-02-16 10:22 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, tamas, suravee.suthikulpanit, Razvan Cojocaru,
	george.dunlap, andrew.cooper3, tim, jbeulich, jun.nakajima,
	boris.ostrovsky

The emulation layers of Xen lack PCID support, and as we only offer
PCID to HAP guests, all writes to CR3 are handled by hardware,
except when introspection is involved. Consequently, trying to set
CR3 when the noflush bit is set in hvm_set_cr3() leads to domain
crashes. The workaround is to clear the noflush bit in
hvm_set_cr3(). CR3 values in hvm_monitor_cr() are also sanitized.
Additionally, a bool parameter now propagates to
{svm,vmx}_update_guest_cr(), so that no flushes occur when
the bit was set.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
Reported-by: Bitweasil <bitweasil@cryptohaze.com>
Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Tamas K Lengyel <tamas@tklengyel.com>

---
Changes since V4:
 - Changed "else { if !(flags & HVM_UPDATE_GUEST_CR3_NO_FLUSH) ) ... }"
   to "else if ...".
 - Renamed HVM_UPDATE_GUEST_CR3_NO_FLUSH to
   HVM_UPDATE_GUEST_CR3_NOFLUSH.
 - Defined X86_CR3_NOFLUSH as (_AC(1, ULL) << 63).
---
 xen/arch/x86/hvm/hvm.c            | 13 ++++++++++---
 xen/arch/x86/hvm/monitor.c        |  3 +++
 xen/arch/x86/hvm/svm/svm.c        | 15 +++++++++------
 xen/arch/x86/hvm/vmx/vmx.c        | 18 +++++++++++-------
 xen/arch/x86/mm.c                 |  2 +-
 xen/arch/x86/mm/hap/hap.c         |  6 +++---
 xen/arch/x86/mm/shadow/common.c   |  2 +-
 xen/arch/x86/mm/shadow/multi.c    |  6 +++---
 xen/arch/x86/mm/shadow/none.c     |  2 +-
 xen/include/asm-x86/hvm/hvm.h     | 15 +++++++++++++--
 xen/include/asm-x86/hvm/svm/svm.h |  2 +-
 xen/include/asm-x86/paging.h      |  7 ++++---
 xen/include/asm-x86/x86-defns.h   |  5 +++++
 13 files changed, 65 insertions(+), 31 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 91bc3e8..8047d74 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2297,6 +2297,7 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
     struct vcpu *v = current;
     struct page_info *page;
     unsigned long old = v->arch.hvm_vcpu.guest_cr[3];
+    bool noflush = false;
 
     if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled &
                                monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) )
@@ -2313,6 +2314,12 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
         }
     }
 
+    if ( hvm_pcid_enabled(v) ) /* Clear the noflush bit. */
+    {
+        noflush = value & X86_CR3_NOFLUSH;
+        value &= ~X86_CR3_NOFLUSH;
+    }
+
     if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
          (value != v->arch.hvm_vcpu.guest_cr[3]) )
     {
@@ -2330,7 +2337,7 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
     }
 
     v->arch.hvm_vcpu.guest_cr[3] = value;
-    paging_update_cr3(v);
+    paging_update_cr3(v, noflush);
     return X86EMUL_OKAY;
 
  bad_cr3:
@@ -4031,7 +4038,7 @@ static int hvmop_flush_tlb_all(void)
 
     /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
     for_each_vcpu ( d, v )
-        paging_update_cr3(v);
+        paging_update_cr3(v, false);
 
     /* Flush all dirty TLBs. */
     flush_tlb_mask(d->dirty_cpumask);
@@ -4193,7 +4200,7 @@ static int hvmop_set_param(
         domain_pause(d);
         d->arch.hvm_domain.params[a.index] = a.value;
         for_each_vcpu ( d, v )
-            paging_update_cr3(v);
+            paging_update_cr3(v, false);
         domain_unpause(d);
 
         domctl_lock_release();
diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
index 5d568a3..9869f07 100644
--- a/xen/arch/x86/hvm/monitor.c
+++ b/xen/arch/x86/hvm/monitor.c
@@ -36,6 +36,9 @@ bool hvm_monitor_cr(unsigned int index, unsigned long value, unsigned long old)
     struct arch_domain *ad = &curr->domain->arch;
     unsigned int ctrlreg_bitmask = monitor_ctrlreg_bitmask(index);
 
+    if ( index == VM_EVENT_X86_CR3 && hvm_pcid_enabled(curr) )
+        value &= ~X86_CR3_NOFLUSH; /* Clear the noflush bit. */
+
     if ( (ad->monitor.write_ctrlreg_enabled & ctrlreg_bitmask) &&
          (!(ad->monitor.write_ctrlreg_onchangeonly & ctrlreg_bitmask) ||
           value != old) &&
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 9f58afc..2c62d5e 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -325,9 +325,9 @@ static int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
     v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
     v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
     v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
-    svm_update_guest_cr(v, 0);
-    svm_update_guest_cr(v, 2);
-    svm_update_guest_cr(v, 4);
+    svm_update_guest_cr(v, 0, 0);
+    svm_update_guest_cr(v, 2, 0);
+    svm_update_guest_cr(v, 4, 0);
 
     /* Load sysenter MSRs into both VMCB save area and VCPU fields. */
     vmcb->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs = c->sysenter_cs;
@@ -543,7 +543,7 @@ static int svm_guest_x86_mode(struct vcpu *v)
     return likely(vmcb->cs.db) ? 4 : 2;
 }
 
-void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
+void svm_update_guest_cr(struct vcpu *v, unsigned int cr, unsigned int flags)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     uint64_t value;
@@ -583,10 +583,13 @@ void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
     case 3:
         vmcb_set_cr3(vmcb, v->arch.hvm_vcpu.hw_cr[3]);
         if ( !nestedhvm_enabled(v->domain) )
-            hvm_asid_flush_vcpu(v);
+        {
+            if ( !(flags & HVM_UPDATE_GUEST_CR3_NOFLUSH) )
+                hvm_asid_flush_vcpu(v);
+        }
         else if ( nestedhvm_vmswitch_in_progress(v) )
             ; /* CR3 switches during VMRUN/VMEXIT do not flush the TLB. */
-        else
+        else if ( !(flags & HVM_UPDATE_GUEST_CR3_NOFLUSH) )
             hvm_asid_flush_vcpu_asid(
                 nestedhvm_vcpu_in_guestmode(v)
                 ? &vcpu_nestedhvm(v).nv_n2asid : &v->arch.hvm_vcpu.n1asid);
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 5cd689e..e9ad04d 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -68,7 +68,8 @@ static void vmx_ctxt_switch_to(struct vcpu *v);
 static int  vmx_alloc_vlapic_mapping(struct domain *d);
 static void vmx_free_vlapic_mapping(struct domain *d);
 static void vmx_install_vlapic_mapping(struct vcpu *v);
-static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr);
+static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr,
+                                unsigned int flags);
 static void vmx_update_guest_efer(struct vcpu *v);
 static void vmx_wbinvd_intercept(void);
 static void vmx_fpu_dirty_intercept(void);
@@ -836,9 +837,9 @@ static int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
 
     v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
     v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
-    vmx_update_guest_cr(v, 0);
-    vmx_update_guest_cr(v, 2);
-    vmx_update_guest_cr(v, 4);
+    vmx_update_guest_cr(v, 0, 0);
+    vmx_update_guest_cr(v, 2, 0);
+    vmx_update_guest_cr(v, 4, 0);
 
     v->arch.hvm_vcpu.guest_efer = c->msr_efer;
     vmx_update_guest_efer(v);
@@ -1548,7 +1549,8 @@ void vmx_update_debug_state(struct vcpu *v)
     vmx_vmcs_exit(v);
 }
 
-static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
+static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr,
+                                unsigned int flags)
 {
     vmx_vmcs_enter(v);
 
@@ -1700,7 +1702,9 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
         }
 
         __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]);
-        hvm_asid_flush_vcpu(v);
+
+        if ( !(flags & HVM_UPDATE_GUEST_CR3_NOFLUSH) )
+            hvm_asid_flush_vcpu(v);
         break;
 
     default:
@@ -2652,7 +2656,7 @@ static int vmx_cr_access(unsigned long exit_qualification)
          */
         hvm_monitor_crX(CR0, value, old);
         curr->arch.hvm_vcpu.guest_cr[0] = value;
-        vmx_update_guest_cr(curr, 0);
+        vmx_update_guest_cr(curr, 0, 0);
         HVMTRACE_0D(CLTS);
         break;
     }
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index e1f089b..9d26a9d 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -526,7 +526,7 @@ void update_cr3(struct vcpu *v)
 
     if ( paging_mode_enabled(v->domain) )
     {
-        paging_update_cr3(v);
+        paging_update_cr3(v, false);
         return;
     }
 
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index 003c2d8..b76e6b8 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -669,10 +669,10 @@ static bool_t hap_invlpg(struct vcpu *v, unsigned long va)
     return 1;
 }
 
-static void hap_update_cr3(struct vcpu *v, int do_locking)
+static void hap_update_cr3(struct vcpu *v, int do_locking, bool noflush)
 {
     v->arch.hvm_vcpu.hw_cr[3] = v->arch.hvm_vcpu.guest_cr[3];
-    hvm_update_guest_cr(v, 3);
+    hvm_update_guest_cr3(v, noflush);
 }
 
 const struct paging_mode *
@@ -708,7 +708,7 @@ static void hap_update_paging_modes(struct vcpu *v)
     }
 
     /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
-    hap_update_cr3(v, 0);
+    hap_update_cr3(v, 0, false);
 
     paging_unlock(d);
     put_gfn(d, cr3_gfn);
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index c240953..20ded3e 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -3030,7 +3030,7 @@ static void sh_update_paging_modes(struct vcpu *v)
     }
 #endif /* OOS */
 
-    v->arch.paging.mode->update_cr3(v, 0);
+    v->arch.paging.mode->update_cr3(v, 0, false);
 }
 
 void shadow_update_paging_modes(struct vcpu *v)
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index a6372e3..fcc4fa3 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -3173,7 +3173,7 @@ static int sh_page_fault(struct vcpu *v,
          * In any case, in the PAE case, the ASSERT is not true; it can
          * happen because of actions the guest is taking. */
 #if GUEST_PAGING_LEVELS == 3
-        v->arch.paging.mode->update_cr3(v, 0);
+        v->arch.paging.mode->update_cr3(v, 0, false);
 #else
         ASSERT(d->is_shutting_down);
 #endif
@@ -3992,7 +3992,7 @@ sh_set_toplevel_shadow(struct vcpu *v,
 
 
 static void
-sh_update_cr3(struct vcpu *v, int do_locking)
+sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
 /* Updates vcpu->arch.cr3 after the guest has changed CR3.
  * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
  * if appropriate).
@@ -4234,7 +4234,7 @@ sh_update_cr3(struct vcpu *v, int do_locking)
         v->arch.hvm_vcpu.hw_cr[3] =
             pagetable_get_paddr(v->arch.shadow_table[0]);
 #endif
-        hvm_update_guest_cr(v, 3);
+        hvm_update_guest_cr3(v, noflush);
     }
 
     /* Fix up the linear pagetable mappings */
diff --git a/xen/arch/x86/mm/shadow/none.c b/xen/arch/x86/mm/shadow/none.c
index 9e6ad23..a8c9604 100644
--- a/xen/arch/x86/mm/shadow/none.c
+++ b/xen/arch/x86/mm/shadow/none.c
@@ -50,7 +50,7 @@ static unsigned long _gva_to_gfn(struct vcpu *v, struct p2m_domain *p2m,
     return gfn_x(INVALID_GFN);
 }
 
-static void _update_cr3(struct vcpu *v, int do_locking)
+static void _update_cr3(struct vcpu *v, int do_locking, bool noflush)
 {
     ASSERT_UNREACHABLE();
 }
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index dd3dd5f..ca3a334 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -80,6 +80,9 @@ enum hvm_intblk {
 #define HVM_EVENT_VECTOR_UNSET    (-1)
 #define HVM_EVENT_VECTOR_UPDATING (-2)
 
+/* update_guest_cr() flags. */
+#define HVM_UPDATE_GUEST_CR3_NOFLUSH 0x00000001
+
 /*
  * The hardware virtual machine (HVM) interface abstracts away from the
  * x86/x86_64 CPU virtualization assist specifics. Currently this interface
@@ -132,7 +135,8 @@ struct hvm_function_table {
     /*
      * Called to inform HVM layer that a guest CRn or EFER has changed.
      */
-    void (*update_guest_cr)(struct vcpu *v, unsigned int cr);
+    void (*update_guest_cr)(struct vcpu *v, unsigned int cr,
+                            unsigned int flags);
     void (*update_guest_efer)(struct vcpu *v);
 
     void (*cpuid_policy_changed)(struct vcpu *v);
@@ -324,7 +328,14 @@ hvm_update_host_cr3(struct vcpu *v)
 
 static inline void hvm_update_guest_cr(struct vcpu *v, unsigned int cr)
 {
-    hvm_funcs.update_guest_cr(v, cr);
+    hvm_funcs.update_guest_cr(v, cr, 0);
+}
+
+static inline void hvm_update_guest_cr3(struct vcpu *v, bool noflush)
+{
+    unsigned int flags = noflush ? HVM_UPDATE_GUEST_CR3_NOFLUSH : 0;
+
+    hvm_funcs.update_guest_cr(v, 3, flags);
 }
 
 static inline void hvm_update_guest_efer(struct vcpu *v)
diff --git a/xen/include/asm-x86/hvm/svm/svm.h b/xen/include/asm-x86/hvm/svm/svm.h
index 462cb89..6c050f5 100644
--- a/xen/include/asm-x86/hvm/svm/svm.h
+++ b/xen/include/asm-x86/hvm/svm/svm.h
@@ -51,7 +51,7 @@ static inline void svm_invlpga(unsigned long vaddr, uint32_t asid)
 
 unsigned long *svm_msrbit(unsigned long *msr_bitmap, uint32_t msr);
 void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len);
-void svm_update_guest_cr(struct vcpu *, unsigned int cr);
+void svm_update_guest_cr(struct vcpu *, unsigned int cr, unsigned int flags);
 
 extern u32 svm_feature_flags;
 
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index 5607ab4..dd3e31f 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -122,7 +122,8 @@ struct paging_mode {
                                             unsigned long cr3,
                                             paddr_t ga, uint32_t *pfec,
                                             unsigned int *page_order);
-    void          (*update_cr3            )(struct vcpu *v, int do_locking);
+    void          (*update_cr3            )(struct vcpu *v, int do_locking,
+                                            bool noflush);
     void          (*update_paging_modes   )(struct vcpu *v);
     void          (*write_p2m_entry       )(struct domain *d, unsigned long gfn,
                                             l1_pgentry_t *p, l1_pgentry_t new,
@@ -276,9 +277,9 @@ static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
 /* Update all the things that are derived from the guest's CR3.
  * Called when the guest changes CR3; the caller can then use v->arch.cr3
  * as the value to load into the host CR3 to schedule this vcpu */
-static inline void paging_update_cr3(struct vcpu *v)
+static inline void paging_update_cr3(struct vcpu *v, bool noflush)
 {
-    paging_get_hostmode(v)->update_cr3(v, 1);
+    paging_get_hostmode(v)->update_cr3(v, 1, noflush);
 }
 
 /* Update all the things that are derived from the guest's CR0/CR3/CR4.
diff --git a/xen/include/asm-x86/x86-defns.h b/xen/include/asm-x86/x86-defns.h
index 70453e8..8598ade 100644
--- a/xen/include/asm-x86/x86-defns.h
+++ b/xen/include/asm-x86/x86-defns.h
@@ -43,6 +43,11 @@
 #define X86_CR0_PG              0x80000000 /* Paging                   (RW) */
 
 /*
+ * Intel CPU flags in CR3
+ */
+#define X86_CR3_NOFLUSH (_AC(1, ULL) << 63)
+
+/*
  * Intel CPU features in CR4
  */
 #define X86_CR4_VME        0x00000001 /* enable vm86 extensions */
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread