All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set
@ 2018-02-16 10:22 Razvan Cojocaru
  2018-02-16 11:17 ` Jan Beulich
  2018-02-23  4:53 ` Tian, Kevin
  0 siblings, 2 replies; 13+ messages in thread
From: Razvan Cojocaru @ 2018-02-16 10:22 UTC (permalink / raw)
  To: xen-devel
  Cc: kevin.tian, tamas, suravee.suthikulpanit, Razvan Cojocaru,
	george.dunlap, andrew.cooper3, tim, jbeulich, jun.nakajima,
	boris.ostrovsky

The emulation layers of Xen lack PCID support, and as we only offer
PCID to HAP guests, all writes to CR3 are handled by hardware,
except when introspection is involved. Consequently, trying to set
CR3 when the noflush bit is set in hvm_set_cr3() leads to domain
crashes. The workaround is to clear the noflush bit in
hvm_set_cr3(). CR3 values in hvm_monitor_cr() are also sanitized.
Additionally, a bool parameter now propagates to
{svm,vmx}_update_guest_cr(), so that no flushes occur when
the bit was set.

Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
Reported-by: Bitweasil <bitweasil@cryptohaze.com>
Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Tamas K Lengyel <tamas@tklengyel.com>

---
Changes since V4:
 - Changed "else { if !(flags & HVM_UPDATE_GUEST_CR3_NO_FLUSH) ) ... }"
   to "else if ...".
 - Renamed HVM_UPDATE_GUEST_CR3_NO_FLUSH to
   HVM_UPDATE_GUEST_CR3_NOFLUSH.
 - Defined X86_CR3_NOFLUSH as (_AC(1, ULL) << 63).
---
 xen/arch/x86/hvm/hvm.c            | 13 ++++++++++---
 xen/arch/x86/hvm/monitor.c        |  3 +++
 xen/arch/x86/hvm/svm/svm.c        | 15 +++++++++------
 xen/arch/x86/hvm/vmx/vmx.c        | 18 +++++++++++-------
 xen/arch/x86/mm.c                 |  2 +-
 xen/arch/x86/mm/hap/hap.c         |  6 +++---
 xen/arch/x86/mm/shadow/common.c   |  2 +-
 xen/arch/x86/mm/shadow/multi.c    |  6 +++---
 xen/arch/x86/mm/shadow/none.c     |  2 +-
 xen/include/asm-x86/hvm/hvm.h     | 15 +++++++++++++--
 xen/include/asm-x86/hvm/svm/svm.h |  2 +-
 xen/include/asm-x86/paging.h      |  7 ++++---
 xen/include/asm-x86/x86-defns.h   |  5 +++++
 13 files changed, 65 insertions(+), 31 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 91bc3e8..8047d74 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -2297,6 +2297,7 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
     struct vcpu *v = current;
     struct page_info *page;
     unsigned long old = v->arch.hvm_vcpu.guest_cr[3];
+    bool noflush = false;
 
     if ( may_defer && unlikely(v->domain->arch.monitor.write_ctrlreg_enabled &
                                monitor_ctrlreg_bitmask(VM_EVENT_X86_CR3)) )
@@ -2313,6 +2314,12 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
         }
     }
 
+    if ( hvm_pcid_enabled(v) ) /* Clear the noflush bit. */
+    {
+        noflush = value & X86_CR3_NOFLUSH;
+        value &= ~X86_CR3_NOFLUSH;
+    }
+
     if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
          (value != v->arch.hvm_vcpu.guest_cr[3]) )
     {
@@ -2330,7 +2337,7 @@ int hvm_set_cr3(unsigned long value, bool_t may_defer)
     }
 
     v->arch.hvm_vcpu.guest_cr[3] = value;
-    paging_update_cr3(v);
+    paging_update_cr3(v, noflush);
     return X86EMUL_OKAY;
 
  bad_cr3:
@@ -4031,7 +4038,7 @@ static int hvmop_flush_tlb_all(void)
 
     /* Flush paging-mode soft state (e.g., va->gfn cache; PAE PDPE cache). */
     for_each_vcpu ( d, v )
-        paging_update_cr3(v);
+        paging_update_cr3(v, false);
 
     /* Flush all dirty TLBs. */
     flush_tlb_mask(d->dirty_cpumask);
@@ -4193,7 +4200,7 @@ static int hvmop_set_param(
         domain_pause(d);
         d->arch.hvm_domain.params[a.index] = a.value;
         for_each_vcpu ( d, v )
-            paging_update_cr3(v);
+            paging_update_cr3(v, false);
         domain_unpause(d);
 
         domctl_lock_release();
diff --git a/xen/arch/x86/hvm/monitor.c b/xen/arch/x86/hvm/monitor.c
index 5d568a3..9869f07 100644
--- a/xen/arch/x86/hvm/monitor.c
+++ b/xen/arch/x86/hvm/monitor.c
@@ -36,6 +36,9 @@ bool hvm_monitor_cr(unsigned int index, unsigned long value, unsigned long old)
     struct arch_domain *ad = &curr->domain->arch;
     unsigned int ctrlreg_bitmask = monitor_ctrlreg_bitmask(index);
 
+    if ( index == VM_EVENT_X86_CR3 && hvm_pcid_enabled(curr) )
+        value &= ~X86_CR3_NOFLUSH; /* Clear the noflush bit. */
+
     if ( (ad->monitor.write_ctrlreg_enabled & ctrlreg_bitmask) &&
          (!(ad->monitor.write_ctrlreg_onchangeonly & ctrlreg_bitmask) ||
           value != old) &&
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 9f58afc..2c62d5e 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -325,9 +325,9 @@ static int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
     v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
     v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
     v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
-    svm_update_guest_cr(v, 0);
-    svm_update_guest_cr(v, 2);
-    svm_update_guest_cr(v, 4);
+    svm_update_guest_cr(v, 0, 0);
+    svm_update_guest_cr(v, 2, 0);
+    svm_update_guest_cr(v, 4, 0);
 
     /* Load sysenter MSRs into both VMCB save area and VCPU fields. */
     vmcb->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs = c->sysenter_cs;
@@ -543,7 +543,7 @@ static int svm_guest_x86_mode(struct vcpu *v)
     return likely(vmcb->cs.db) ? 4 : 2;
 }
 
-void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
+void svm_update_guest_cr(struct vcpu *v, unsigned int cr, unsigned int flags)
 {
     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     uint64_t value;
@@ -583,10 +583,13 @@ void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
     case 3:
         vmcb_set_cr3(vmcb, v->arch.hvm_vcpu.hw_cr[3]);
         if ( !nestedhvm_enabled(v->domain) )
-            hvm_asid_flush_vcpu(v);
+        {
+            if ( !(flags & HVM_UPDATE_GUEST_CR3_NOFLUSH) )
+                hvm_asid_flush_vcpu(v);
+        }
         else if ( nestedhvm_vmswitch_in_progress(v) )
             ; /* CR3 switches during VMRUN/VMEXIT do not flush the TLB. */
-        else
+        else if ( !(flags & HVM_UPDATE_GUEST_CR3_NOFLUSH) )
             hvm_asid_flush_vcpu_asid(
                 nestedhvm_vcpu_in_guestmode(v)
                 ? &vcpu_nestedhvm(v).nv_n2asid : &v->arch.hvm_vcpu.n1asid);
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 5cd689e..e9ad04d 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -68,7 +68,8 @@ static void vmx_ctxt_switch_to(struct vcpu *v);
 static int  vmx_alloc_vlapic_mapping(struct domain *d);
 static void vmx_free_vlapic_mapping(struct domain *d);
 static void vmx_install_vlapic_mapping(struct vcpu *v);
-static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr);
+static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr,
+                                unsigned int flags);
 static void vmx_update_guest_efer(struct vcpu *v);
 static void vmx_wbinvd_intercept(void);
 static void vmx_fpu_dirty_intercept(void);
@@ -836,9 +837,9 @@ static int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
 
     v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
     v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
-    vmx_update_guest_cr(v, 0);
-    vmx_update_guest_cr(v, 2);
-    vmx_update_guest_cr(v, 4);
+    vmx_update_guest_cr(v, 0, 0);
+    vmx_update_guest_cr(v, 2, 0);
+    vmx_update_guest_cr(v, 4, 0);
 
     v->arch.hvm_vcpu.guest_efer = c->msr_efer;
     vmx_update_guest_efer(v);
@@ -1548,7 +1549,8 @@ void vmx_update_debug_state(struct vcpu *v)
     vmx_vmcs_exit(v);
 }
 
-static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
+static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr,
+                                unsigned int flags)
 {
     vmx_vmcs_enter(v);
 
@@ -1700,7 +1702,9 @@ static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr)
         }
 
         __vmwrite(GUEST_CR3, v->arch.hvm_vcpu.hw_cr[3]);
-        hvm_asid_flush_vcpu(v);
+
+        if ( !(flags & HVM_UPDATE_GUEST_CR3_NOFLUSH) )
+            hvm_asid_flush_vcpu(v);
         break;
 
     default:
@@ -2652,7 +2656,7 @@ static int vmx_cr_access(unsigned long exit_qualification)
          */
         hvm_monitor_crX(CR0, value, old);
         curr->arch.hvm_vcpu.guest_cr[0] = value;
-        vmx_update_guest_cr(curr, 0);
+        vmx_update_guest_cr(curr, 0, 0);
         HVMTRACE_0D(CLTS);
         break;
     }
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index e1f089b..9d26a9d 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -526,7 +526,7 @@ void update_cr3(struct vcpu *v)
 
     if ( paging_mode_enabled(v->domain) )
     {
-        paging_update_cr3(v);
+        paging_update_cr3(v, false);
         return;
     }
 
diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c
index 003c2d8..b76e6b8 100644
--- a/xen/arch/x86/mm/hap/hap.c
+++ b/xen/arch/x86/mm/hap/hap.c
@@ -669,10 +669,10 @@ static bool_t hap_invlpg(struct vcpu *v, unsigned long va)
     return 1;
 }
 
-static void hap_update_cr3(struct vcpu *v, int do_locking)
+static void hap_update_cr3(struct vcpu *v, int do_locking, bool noflush)
 {
     v->arch.hvm_vcpu.hw_cr[3] = v->arch.hvm_vcpu.guest_cr[3];
-    hvm_update_guest_cr(v, 3);
+    hvm_update_guest_cr3(v, noflush);
 }
 
 const struct paging_mode *
@@ -708,7 +708,7 @@ static void hap_update_paging_modes(struct vcpu *v)
     }
 
     /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */
-    hap_update_cr3(v, 0);
+    hap_update_cr3(v, 0, false);
 
     paging_unlock(d);
     put_gfn(d, cr3_gfn);
diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
index c240953..20ded3e 100644
--- a/xen/arch/x86/mm/shadow/common.c
+++ b/xen/arch/x86/mm/shadow/common.c
@@ -3030,7 +3030,7 @@ static void sh_update_paging_modes(struct vcpu *v)
     }
 #endif /* OOS */
 
-    v->arch.paging.mode->update_cr3(v, 0);
+    v->arch.paging.mode->update_cr3(v, 0, false);
 }
 
 void shadow_update_paging_modes(struct vcpu *v)
diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c
index a6372e3..fcc4fa3 100644
--- a/xen/arch/x86/mm/shadow/multi.c
+++ b/xen/arch/x86/mm/shadow/multi.c
@@ -3173,7 +3173,7 @@ static int sh_page_fault(struct vcpu *v,
          * In any case, in the PAE case, the ASSERT is not true; it can
          * happen because of actions the guest is taking. */
 #if GUEST_PAGING_LEVELS == 3
-        v->arch.paging.mode->update_cr3(v, 0);
+        v->arch.paging.mode->update_cr3(v, 0, false);
 #else
         ASSERT(d->is_shutting_down);
 #endif
@@ -3992,7 +3992,7 @@ sh_set_toplevel_shadow(struct vcpu *v,
 
 
 static void
-sh_update_cr3(struct vcpu *v, int do_locking)
+sh_update_cr3(struct vcpu *v, int do_locking, bool noflush)
 /* Updates vcpu->arch.cr3 after the guest has changed CR3.
  * Paravirtual guests should set v->arch.guest_table (and guest_table_user,
  * if appropriate).
@@ -4234,7 +4234,7 @@ sh_update_cr3(struct vcpu *v, int do_locking)
         v->arch.hvm_vcpu.hw_cr[3] =
             pagetable_get_paddr(v->arch.shadow_table[0]);
 #endif
-        hvm_update_guest_cr(v, 3);
+        hvm_update_guest_cr3(v, noflush);
     }
 
     /* Fix up the linear pagetable mappings */
diff --git a/xen/arch/x86/mm/shadow/none.c b/xen/arch/x86/mm/shadow/none.c
index 9e6ad23..a8c9604 100644
--- a/xen/arch/x86/mm/shadow/none.c
+++ b/xen/arch/x86/mm/shadow/none.c
@@ -50,7 +50,7 @@ static unsigned long _gva_to_gfn(struct vcpu *v, struct p2m_domain *p2m,
     return gfn_x(INVALID_GFN);
 }
 
-static void _update_cr3(struct vcpu *v, int do_locking)
+static void _update_cr3(struct vcpu *v, int do_locking, bool noflush)
 {
     ASSERT_UNREACHABLE();
 }
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index dd3dd5f..ca3a334 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -80,6 +80,9 @@ enum hvm_intblk {
 #define HVM_EVENT_VECTOR_UNSET    (-1)
 #define HVM_EVENT_VECTOR_UPDATING (-2)
 
+/* update_guest_cr() flags. */
+#define HVM_UPDATE_GUEST_CR3_NOFLUSH 0x00000001
+
 /*
  * The hardware virtual machine (HVM) interface abstracts away from the
  * x86/x86_64 CPU virtualization assist specifics. Currently this interface
@@ -132,7 +135,8 @@ struct hvm_function_table {
     /*
      * Called to inform HVM layer that a guest CRn or EFER has changed.
      */
-    void (*update_guest_cr)(struct vcpu *v, unsigned int cr);
+    void (*update_guest_cr)(struct vcpu *v, unsigned int cr,
+                            unsigned int flags);
     void (*update_guest_efer)(struct vcpu *v);
 
     void (*cpuid_policy_changed)(struct vcpu *v);
@@ -324,7 +328,14 @@ hvm_update_host_cr3(struct vcpu *v)
 
 static inline void hvm_update_guest_cr(struct vcpu *v, unsigned int cr)
 {
-    hvm_funcs.update_guest_cr(v, cr);
+    hvm_funcs.update_guest_cr(v, cr, 0);
+}
+
+static inline void hvm_update_guest_cr3(struct vcpu *v, bool noflush)
+{
+    unsigned int flags = noflush ? HVM_UPDATE_GUEST_CR3_NOFLUSH : 0;
+
+    hvm_funcs.update_guest_cr(v, 3, flags);
 }
 
 static inline void hvm_update_guest_efer(struct vcpu *v)
diff --git a/xen/include/asm-x86/hvm/svm/svm.h b/xen/include/asm-x86/hvm/svm/svm.h
index 462cb89..6c050f5 100644
--- a/xen/include/asm-x86/hvm/svm/svm.h
+++ b/xen/include/asm-x86/hvm/svm/svm.h
@@ -51,7 +51,7 @@ static inline void svm_invlpga(unsigned long vaddr, uint32_t asid)
 
 unsigned long *svm_msrbit(unsigned long *msr_bitmap, uint32_t msr);
 void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len);
-void svm_update_guest_cr(struct vcpu *, unsigned int cr);
+void svm_update_guest_cr(struct vcpu *, unsigned int cr, unsigned int flags);
 
 extern u32 svm_feature_flags;
 
diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
index 5607ab4..dd3e31f 100644
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -122,7 +122,8 @@ struct paging_mode {
                                             unsigned long cr3,
                                             paddr_t ga, uint32_t *pfec,
                                             unsigned int *page_order);
-    void          (*update_cr3            )(struct vcpu *v, int do_locking);
+    void          (*update_cr3            )(struct vcpu *v, int do_locking,
+                                            bool noflush);
     void          (*update_paging_modes   )(struct vcpu *v);
     void          (*write_p2m_entry       )(struct domain *d, unsigned long gfn,
                                             l1_pgentry_t *p, l1_pgentry_t new,
@@ -276,9 +277,9 @@ static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
 /* Update all the things that are derived from the guest's CR3.
  * Called when the guest changes CR3; the caller can then use v->arch.cr3
  * as the value to load into the host CR3 to schedule this vcpu */
-static inline void paging_update_cr3(struct vcpu *v)
+static inline void paging_update_cr3(struct vcpu *v, bool noflush)
 {
-    paging_get_hostmode(v)->update_cr3(v, 1);
+    paging_get_hostmode(v)->update_cr3(v, 1, noflush);
 }
 
 /* Update all the things that are derived from the guest's CR0/CR3/CR4.
diff --git a/xen/include/asm-x86/x86-defns.h b/xen/include/asm-x86/x86-defns.h
index 70453e8..8598ade 100644
--- a/xen/include/asm-x86/x86-defns.h
+++ b/xen/include/asm-x86/x86-defns.h
@@ -43,6 +43,11 @@
 #define X86_CR0_PG              0x80000000 /* Paging                   (RW) */
 
 /*
+ * Intel CPU flags in CR3
+ */
+#define X86_CR3_NOFLUSH (_AC(1, ULL) << 63)
+
+/*
  * Intel CPU features in CR4
  */
 #define X86_CR4_VME        0x00000001 /* enable vm86 extensions */
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set
  2018-02-16 10:22 [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set Razvan Cojocaru
@ 2018-02-16 11:17 ` Jan Beulich
  2018-02-19  8:48   ` Razvan Cojocaru
  2018-02-23  4:53 ` Tian, Kevin
  1 sibling, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2018-02-16 11:17 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, tamas, suravee.suthikulpanit, george.dunlap,
	andrew.cooper3, tim, xen-devel, jun.nakajima, boris.ostrovsky

>>> On 16.02.18 at 11:22, <rcojocaru@bitdefender.com> wrote:
> The emulation layers of Xen lack PCID support, and as we only offer
> PCID to HAP guests, all writes to CR3 are handled by hardware,
> except when introspection is involved. Consequently, trying to set
> CR3 when the noflush bit is set in hvm_set_cr3() leads to domain
> crashes. The workaround is to clear the noflush bit in
> hvm_set_cr3(). CR3 values in hvm_monitor_cr() are also sanitized.
> Additionally, a bool parameter now propagates to
> {svm,vmx}_update_guest_cr(), so that no flushes occur when
> the bit was set.
> 
> Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
> Reported-by: Bitweasil <bitweasil@cryptohaze.com>
> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Acked-by: Tamas K Lengyel <tamas@tklengyel.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set
  2018-02-16 11:17 ` Jan Beulich
@ 2018-02-19  8:48   ` Razvan Cojocaru
  2018-02-19  8:53     ` Jan Beulich
  0 siblings, 1 reply; 13+ messages in thread
From: Razvan Cojocaru @ 2018-02-19  8:48 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, tamas, suravee.suthikulpanit, george.dunlap,
	andrew.cooper3, tim, xen-devel, jun.nakajima, boris.ostrovsky

On 02/16/2018 01:17 PM, Jan Beulich wrote:
>>>> On 16.02.18 at 11:22, <rcojocaru@bitdefender.com> wrote:
>> The emulation layers of Xen lack PCID support, and as we only offer
>> PCID to HAP guests, all writes to CR3 are handled by hardware,
>> except when introspection is involved. Consequently, trying to set
>> CR3 when the noflush bit is set in hvm_set_cr3() leads to domain
>> crashes. The workaround is to clear the noflush bit in
>> hvm_set_cr3(). CR3 values in hvm_monitor_cr() are also sanitized.
>> Additionally, a bool parameter now propagates to
>> {svm,vmx}_update_guest_cr(), so that no flushes occur when
>> the bit was set.
>>
>> Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
>> Reported-by: Bitweasil <bitweasil@cryptohaze.com>
>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>> Acked-by: Tamas K Lengyel <tamas@tklengyel.com>
> 
> Reviewed-by: Jan Beulich <jbeulich@suse.com>

Should I rebase this because of commit
24470b99c1671dca531c2cf5747eda2f8892ecbc?


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set
  2018-02-19  8:48   ` Razvan Cojocaru
@ 2018-02-19  8:53     ` Jan Beulich
  2018-02-19  8:59       ` Razvan Cojocaru
  0 siblings, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2018-02-19  8:53 UTC (permalink / raw)
  To: Razvan Cojocaru
  Cc: kevin.tian, tamas, suravee.suthikulpanit, george.dunlap,
	andrew.cooper3, tim, xen-devel, jun.nakajima, boris.ostrovsky

>>> On 19.02.18 at 09:48, <rcojocaru@bitdefender.com> wrote:
> On 02/16/2018 01:17 PM, Jan Beulich wrote:
>>>>> On 16.02.18 at 11:22, <rcojocaru@bitdefender.com> wrote:
>>> The emulation layers of Xen lack PCID support, and as we only offer
>>> PCID to HAP guests, all writes to CR3 are handled by hardware,
>>> except when introspection is involved. Consequently, trying to set
>>> CR3 when the noflush bit is set in hvm_set_cr3() leads to domain
>>> crashes. The workaround is to clear the noflush bit in
>>> hvm_set_cr3(). CR3 values in hvm_monitor_cr() are also sanitized.
>>> Additionally, a bool parameter now propagates to
>>> {svm,vmx}_update_guest_cr(), so that no flushes occur when
>>> the bit was set.
>>>
>>> Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
>>> Reported-by: Bitweasil <bitweasil@cryptohaze.com>
>>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>> Acked-by: Tamas K Lengyel <tamas@tklengyel.com>
>> 
>> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> 
> Should I rebase this because of commit
> 24470b99c1671dca531c2cf5747eda2f8892ecbc?

Well, if that change introduces some conflict with yours, then
generally the (obvious) answer is "yes". Considering that your
patch doesn't have all necessary acks yet, whether you wait
until you have those is up to you (as alternatively there may be
further requests for changes to make).

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set
  2018-02-19  8:53     ` Jan Beulich
@ 2018-02-19  8:59       ` Razvan Cojocaru
  0 siblings, 0 replies; 13+ messages in thread
From: Razvan Cojocaru @ 2018-02-19  8:59 UTC (permalink / raw)
  To: Jan Beulich
  Cc: kevin.tian, tamas, suravee.suthikulpanit, george.dunlap,
	andrew.cooper3, tim, xen-devel, jun.nakajima, boris.ostrovsky

On 02/19/2018 10:53 AM, Jan Beulich wrote:
>>>> On 19.02.18 at 09:48, <rcojocaru@bitdefender.com> wrote:
>> On 02/16/2018 01:17 PM, Jan Beulich wrote:
>>>>>> On 16.02.18 at 11:22, <rcojocaru@bitdefender.com> wrote:
>>>> The emulation layers of Xen lack PCID support, and as we only offer
>>>> PCID to HAP guests, all writes to CR3 are handled by hardware,
>>>> except when introspection is involved. Consequently, trying to set
>>>> CR3 when the noflush bit is set in hvm_set_cr3() leads to domain
>>>> crashes. The workaround is to clear the noflush bit in
>>>> hvm_set_cr3(). CR3 values in hvm_monitor_cr() are also sanitized.
>>>> Additionally, a bool parameter now propagates to
>>>> {svm,vmx}_update_guest_cr(), so that no flushes occur when
>>>> the bit was set.
>>>>
>>>> Signed-off-by: Razvan Cojocaru <rcojocaru@bitdefender.com>
>>>> Reported-by: Bitweasil <bitweasil@cryptohaze.com>
>>>> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>>> Acked-by: Tamas K Lengyel <tamas@tklengyel.com>
>>>
>>> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>>
>> Should I rebase this because of commit
>> 24470b99c1671dca531c2cf5747eda2f8892ecbc?
> 
> Well, if that change introduces some conflict with yours, then
> generally the (obvious) answer is "yes". Considering that your
> patch doesn't have all necessary acks yet, whether you wait
> until you have those is up to you (as alternatively there may be
> further requests for changes to make).

Right, sorry for being ambiguous - definitely no conflicts, but I had
assumed (wrongly, as it turns out) that applying it would now require
human intervention. It doesn't.

Sorry for the noise.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set
  2018-02-16 10:22 [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set Razvan Cojocaru
  2018-02-16 11:17 ` Jan Beulich
@ 2018-02-23  4:53 ` Tian, Kevin
  2018-02-23  7:29   ` Razvan Cojocaru
  1 sibling, 1 reply; 13+ messages in thread
From: Tian, Kevin @ 2018-02-23  4:53 UTC (permalink / raw)
  To: Razvan Cojocaru, xen-devel
  Cc: tamas, suravee.suthikulpanit, george.dunlap, andrew.cooper3, tim,
	jbeulich, Nakajima, Jun, boris.ostrovsky

> From: Razvan Cojocaru [mailto:rcojocaru@bitdefender.com]
> Sent: Friday, February 16, 2018 6:22 PM
> 
> The emulation layers of Xen lack PCID support, and as we only offer
> PCID to HAP guests, all writes to CR3 are handled by hardware,
> except when introspection is involved. Consequently, trying to set
> CR3 when the noflush bit is set in hvm_set_cr3() leads to domain
> crashes. The workaround is to clear the noflush bit in
> hvm_set_cr3(). CR3 values in hvm_monitor_cr() are also sanitized.
> Additionally, a bool parameter now propagates to
> {svm,vmx}_update_guest_cr(), so that no flushes occur when
> the bit was set.

Above message is not very clear for people who didn't follow
previous discussions, e.g. why lacking PCID support in emulation 
layer would lead to domain crash? and why noflush trick can 
avoid the situation? Can you help elaborate it?

btw I didn't see any place setting the new macro 
(X86_CR3_NOFLUSH). just check and clear. 

Thanks
Kevin

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set
  2018-02-23  4:53 ` Tian, Kevin
@ 2018-02-23  7:29   ` Razvan Cojocaru
  2018-02-23  7:31     ` Razvan Cojocaru
  2018-02-27 15:53     ` George Dunlap
  0 siblings, 2 replies; 13+ messages in thread
From: Razvan Cojocaru @ 2018-02-23  7:29 UTC (permalink / raw)
  To: Tian, Kevin, xen-devel
  Cc: tamas, suravee.suthikulpanit, george.dunlap, andrew.cooper3, tim,
	jbeulich, Nakajima, Jun, boris.ostrovsky

On 02/23/2018 06:53 AM, Tian, Kevin wrote:
>> From: Razvan Cojocaru [mailto:rcojocaru@bitdefender.com]
>> Sent: Friday, February 16, 2018 6:22 PM
>>
>> The emulation layers of Xen lack PCID support, and as we only offer
>> PCID to HAP guests, all writes to CR3 are handled by hardware,
>> except when introspection is involved. Consequently, trying to set
>> CR3 when the noflush bit is set in hvm_set_cr3() leads to domain
>> crashes. The workaround is to clear the noflush bit in
>> hvm_set_cr3(). CR3 values in hvm_monitor_cr() are also sanitized.
>> Additionally, a bool parameter now propagates to
>> {svm,vmx}_update_guest_cr(), so that no flushes occur when
>> the bit was set.
> 
> Above message is not very clear for people who didn't follow
> previous discussions, e.g. why lacking PCID support in emulation 
> layer would lead to domain crash? and why noflush trick can 
> avoid the situation? Can you help elaborate it?

Lacking PCID support in the emulation layer creates two different way of
handling the NOFLUSH being set: one is in hardware, and this happens for
everything except the introspection case, and one in the emulation layer
(this happens when an introspection agent asks Xen to emulate an
instruction when it replies to an EPT fault vm_event).

The checks in place expected the guest state to be correct with regard
to handling the bit being set "the hardware way", but the emulation
layer was, previous to this patch, completely ignoring the NOFLUSH bit.
Hence, there was a difference between the expected domain state and the
actual domain state, which translated into a domain crash.

This patch does the work required by the NOFLUSH bit being set (i.e. it
avoids the flush when setting CR3), and then clears the bit ensuring
that the final state passes the Xen check.

> btw I didn't see any place setting the new macro 
> (X86_CR3_NOFLUSH). just check and clear.

Xen doesn't set it, the guest OS does.


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set
  2018-02-23  7:29   ` Razvan Cojocaru
@ 2018-02-23  7:31     ` Razvan Cojocaru
  2018-02-24  2:35       ` Tian, Kevin
  2018-02-27 15:53     ` George Dunlap
  1 sibling, 1 reply; 13+ messages in thread
From: Razvan Cojocaru @ 2018-02-23  7:31 UTC (permalink / raw)
  To: Tian, Kevin, xen-devel
  Cc: tamas, suravee.suthikulpanit, george.dunlap, andrew.cooper3, tim,
	jbeulich, Nakajima, Jun, boris.ostrovsky

On 02/23/2018 09:29 AM, Razvan Cojocaru wrote:
> Lacking PCID support in the emulation layer creates two different way of
> handling the NOFLUSH being set: one is in hardware, and this happens for
> everything except the introspection case, and one in the emulation layer
> (this happens when an introspection agent asks Xen to emulate an
> instruction when it replies to an EPT fault vm_event).

Sorry, not when the introspection agent asks Xen to emulate an
instruction when it replies to an EPT fault vm_event, but when the
introspection agent wants to be able to veto a CR3 write - i.e. when the
introspection agent subscribes to CR3 write events.


Thanks,
Razvan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set
  2018-02-23  7:31     ` Razvan Cojocaru
@ 2018-02-24  2:35       ` Tian, Kevin
  0 siblings, 0 replies; 13+ messages in thread
From: Tian, Kevin @ 2018-02-24  2:35 UTC (permalink / raw)
  To: Razvan Cojocaru, xen-devel
  Cc: tamas, suravee.suthikulpanit, george.dunlap, andrew.cooper3, tim,
	jbeulich, Nakajima, Jun, boris.ostrovsky

> From: Razvan Cojocaru [mailto:rcojocaru@bitdefender.com]
> Sent: Friday, February 23, 2018 3:32 PM
> 
> On 02/23/2018 09:29 AM, Razvan Cojocaru wrote:
> > Lacking PCID support in the emulation layer creates two different way of
> > handling the NOFLUSH being set: one is in hardware, and this happens for
> > everything except the introspection case, and one in the emulation layer
> > (this happens when an introspection agent asks Xen to emulate an
> > instruction when it replies to an EPT fault vm_event).
> 
> Sorry, not when the introspection agent asks Xen to emulate an
> instruction when it replies to an EPT fault vm_event, but when the
> introspection agent wants to be able to veto a CR3 write - i.e. when the
> introspection agent subscribes to CR3 write events.
> 

yes, that's what I wondered when reading your 1st reply. 

Now I understood the background and the patch overall looks
good to me. 

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set
  2018-02-23  7:29   ` Razvan Cojocaru
  2018-02-23  7:31     ` Razvan Cojocaru
@ 2018-02-27 15:53     ` George Dunlap
  2018-02-27 16:19       ` Razvan Cojocaru
  1 sibling, 1 reply; 13+ messages in thread
From: George Dunlap @ 2018-02-27 15:53 UTC (permalink / raw)
  To: Razvan Cojocaru, Tian, Kevin, xen-devel
  Cc: tamas, suravee.suthikulpanit, george.dunlap, andrew.cooper3, tim,
	jbeulich, Nakajima, Jun, boris.ostrovsky

On 02/23/2018 07:29 AM, Razvan Cojocaru wrote:
> On 02/23/2018 06:53 AM, Tian, Kevin wrote:
>>> From: Razvan Cojocaru [mailto:rcojocaru@bitdefender.com]
>>> Sent: Friday, February 16, 2018 6:22 PM
>>>
>>> The emulation layers of Xen lack PCID support, and as we only offer
>>> PCID to HAP guests, all writes to CR3 are handled by hardware,
>>> except when introspection is involved. Consequently, trying to set
>>> CR3 when the noflush bit is set in hvm_set_cr3() leads to domain
>>> crashes. The workaround is to clear the noflush bit in
>>> hvm_set_cr3(). CR3 values in hvm_monitor_cr() are also sanitized.
>>> Additionally, a bool parameter now propagates to
>>> {svm,vmx}_update_guest_cr(), so that no flushes occur when
>>> the bit was set.
>>
>> Above message is not very clear for people who didn't follow
>> previous discussions, e.g. why lacking PCID support in emulation 
>> layer would lead to domain crash? and why noflush trick can 
>> avoid the situation? Can you help elaborate it?
> 
> Lacking PCID support in the emulation layer creates two different way of
> handling the NOFLUSH being set: one is in hardware, and this happens for
> everything except the introspection case, and one in the emulation layer
> (this happens when an introspection agent asks Xen to emulate an
> instruction when it replies to an EPT fault vm_event).
> 
> The checks in place expected the guest state to be correct with regard
> to handling the bit being set "the hardware way", but the emulation
> layer was, previous to this patch, completely ignoring the NOFLUSH bit.
> Hence, there was a difference between the expected domain state and the
> actual domain state, which translated into a domain crash.

Just getting up to speed on this -- is it the case that the hardware
will automatically clear this bit; so because it wasn't being cleared in
hvm_set_cr3() during emulation, one of the checks in Xen (on exiting the
emulator?) was failing due to the bit being set, and then crashing the
domain?

In which case you're not so much fixing a domain crash by providing a
workaround, as actually implementing a bit of functionality.

If so I think the commit message could use expanding.  What about
something like the following (assuming I haven't misunderstood what's
going on):

---
In hardware, when PCID support is enabled and the X86_CR3_NOFLUSH bit is
set when writing a CR3 value, the hardware will clear that that bit and
change the CR3 without flushing the TLB.  hvm_set_cr3(), however, was
ignoring this bit; the result was that post-emulation checks detected an
invalid CR3 value and crashed the domain.

Handle X86_CR3_NOFLUSH in hvm_set_cr3() by:
1. Clearing the bit
2. Passing a "noflush" flag to lower-level cr3 setting functions to
indicate that a flush should not be performed.

Also clear X86_CR3_NOFLUSH when reporting CR3 monitored CR3 writes.

This allows introspection to be used on VMs whose operating system uses
the NOFLUSH bit.
---

As an aside -- are you sure clearing the NOFLUSH from reported CR3
values during introspection is the right thing to do?  You don't think
your introspection engine will ever want to know if the guest OS is
setting this bit?

In any case, with the updated commit message:

Acked-by: George Dunlap <george.dunlap@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set
  2018-02-27 15:53     ` George Dunlap
@ 2018-02-27 16:19       ` Razvan Cojocaru
  2018-02-27 16:26         ` George Dunlap
  0 siblings, 1 reply; 13+ messages in thread
From: Razvan Cojocaru @ 2018-02-27 16:19 UTC (permalink / raw)
  To: George Dunlap, Tian, Kevin, xen-devel
  Cc: Nakajima, Jun, tamas, suravee.suthikulpanit, george.dunlap,
	andrew.cooper3, tim, jbeulich, boris.ostrovsky

On 02/27/2018 05:53 PM, George Dunlap wrote:
> On 02/23/2018 07:29 AM, Razvan Cojocaru wrote:
>> On 02/23/2018 06:53 AM, Tian, Kevin wrote:
>>>> From: Razvan Cojocaru [mailto:rcojocaru@bitdefender.com]
>>>> Sent: Friday, February 16, 2018 6:22 PM
>>>>
>>>> The emulation layers of Xen lack PCID support, and as we only offer
>>>> PCID to HAP guests, all writes to CR3 are handled by hardware,
>>>> except when introspection is involved. Consequently, trying to set
>>>> CR3 when the noflush bit is set in hvm_set_cr3() leads to domain
>>>> crashes. The workaround is to clear the noflush bit in
>>>> hvm_set_cr3(). CR3 values in hvm_monitor_cr() are also sanitized.
>>>> Additionally, a bool parameter now propagates to
>>>> {svm,vmx}_update_guest_cr(), so that no flushes occur when
>>>> the bit was set.
>>>
>>> Above message is not very clear for people who didn't follow
>>> previous discussions, e.g. why lacking PCID support in emulation 
>>> layer would lead to domain crash? and why noflush trick can 
>>> avoid the situation? Can you help elaborate it?
>>
>> Lacking PCID support in the emulation layer creates two different way of
>> handling the NOFLUSH being set: one is in hardware, and this happens for
>> everything except the introspection case, and one in the emulation layer
>> (this happens when an introspection agent asks Xen to emulate an
>> instruction when it replies to an EPT fault vm_event).
>>
>> The checks in place expected the guest state to be correct with regard
>> to handling the bit being set "the hardware way", but the emulation
>> layer was, previous to this patch, completely ignoring the NOFLUSH bit.
>> Hence, there was a difference between the expected domain state and the
>> actual domain state, which translated into a domain crash.
> 
> Just getting up to speed on this -- is it the case that the hardware
> will automatically clear this bit; so because it wasn't being cleared in
> hvm_set_cr3() during emulation, one of the checks in Xen (on exiting the
> emulator?) was failing due to the bit being set, and then crashing the
> domain?

Yes, I believe that that is what happens. The check is done on VMENTRY,
this is the original email reporting the issue showing it:

https://lists.xenproject.org/archives/html/xen-devel/2018-01/msg02275.html

> In which case you're not so much fixing a domain crash by providing a
> workaround, as actually implementing a bit of functionality.
> 
> If so I think the commit message could use expanding.  What about
> something like the following (assuming I haven't misunderstood what's
> going on):
> 
> ---
> In hardware, when PCID support is enabled and the X86_CR3_NOFLUSH bit is
> set when writing a CR3 value, the hardware will clear that that bit and
> change the CR3 without flushing the TLB.  hvm_set_cr3(), however, was
> ignoring this bit; the result was that post-emulation checks detected an
> invalid CR3 value and crashed the domain.
> 
> Handle X86_CR3_NOFLUSH in hvm_set_cr3() by:
> 1. Clearing the bit
> 2. Passing a "noflush" flag to lower-level cr3 setting functions to
> indicate that a flush should not be performed.
> 
> Also clear X86_CR3_NOFLUSH when reporting CR3 monitored CR3 writes.
> 
> This allows introspection to be used on VMs whose operating system uses
> the NOFLUSH bit.
> ---

Fair enough, I'm happy to change the commit message if nobody else
objects / has other changes in mind.

> As an aside -- are you sure clearing the NOFLUSH from reported CR3
> values during introspection is the right thing to do?  You don't think
> your introspection engine will ever want to know if the guest OS is
> setting this bit?

We can't be sure this will never be useful to know, but at least for now
I've not seen any requests to be able to, and our introspection engine
is not interested in the information (in fact, one of the reasons why
we've even missed the problem until it's been reported is that we
haven't even been subscribing to CR3 write events for a while now).

So as far as we're concerned, losing the information about the NOFLUSH
bit is no problem at all (and it's definitely preferable to a domain
crash). Since Tamas has acked the patch, it's safe to assume that they
have no problem with it either, and Bitweasil seemed happy with the
solution as well.

I suppose we can always write a patch later if it turns out that this is
valuable information.

> In any case, with the updated commit message:
> 
> Acked-by: George Dunlap <george.dunlap@citrix.com>

Thank you very much for the review!


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set
  2018-02-27 16:19       ` Razvan Cojocaru
@ 2018-02-27 16:26         ` George Dunlap
  2018-02-27 17:04           ` Razvan Cojocaru
  0 siblings, 1 reply; 13+ messages in thread
From: George Dunlap @ 2018-02-27 16:26 UTC (permalink / raw)
  To: Razvan Cojocaru, Tian, Kevin, xen-devel
  Cc: Nakajima, Jun, tamas, suravee.suthikulpanit, george.dunlap,
	andrew.cooper3, tim, jbeulich, boris.ostrovsky

On 02/27/2018 04:19 PM, Razvan Cojocaru wrote:
> On 02/27/2018 05:53 PM, George Dunlap wrote:
>> On 02/23/2018 07:29 AM, Razvan Cojocaru wrote:
>>> On 02/23/2018 06:53 AM, Tian, Kevin wrote:
>>>>> From: Razvan Cojocaru [mailto:rcojocaru@bitdefender.com]
>>>>> Sent: Friday, February 16, 2018 6:22 PM
>>>>>
>>>>> The emulation layers of Xen lack PCID support, and as we only offer
>>>>> PCID to HAP guests, all writes to CR3 are handled by hardware,
>>>>> except when introspection is involved. Consequently, trying to set
>>>>> CR3 when the noflush bit is set in hvm_set_cr3() leads to domain
>>>>> crashes. The workaround is to clear the noflush bit in
>>>>> hvm_set_cr3(). CR3 values in hvm_monitor_cr() are also sanitized.
>>>>> Additionally, a bool parameter now propagates to
>>>>> {svm,vmx}_update_guest_cr(), so that no flushes occur when
>>>>> the bit was set.
>>>>
>>>> Above message is not very clear for people who didn't follow
>>>> previous discussions, e.g. why lacking PCID support in emulation 
>>>> layer would lead to domain crash? and why noflush trick can 
>>>> avoid the situation? Can you help elaborate it?
>>>
>>> Lacking PCID support in the emulation layer creates two different way of
>>> handling the NOFLUSH being set: one is in hardware, and this happens for
>>> everything except the introspection case, and one in the emulation layer
>>> (this happens when an introspection agent asks Xen to emulate an
>>> instruction when it replies to an EPT fault vm_event).
>>>
>>> The checks in place expected the guest state to be correct with regard
>>> to handling the bit being set "the hardware way", but the emulation
>>> layer was, previous to this patch, completely ignoring the NOFLUSH bit.
>>> Hence, there was a difference between the expected domain state and the
>>> actual domain state, which translated into a domain crash.
>>
>> Just getting up to speed on this -- is it the case that the hardware
>> will automatically clear this bit; so because it wasn't being cleared in
>> hvm_set_cr3() during emulation, one of the checks in Xen (on exiting the
>> emulator?) was failing due to the bit being set, and then crashing the
>> domain?
> 
> Yes, I believe that that is what happens. The check is done on VMENTRY,
> this is the original email reporting the issue showing it:
> 
> https://lists.xenproject.org/archives/html/xen-devel/2018-01/msg02275.html
> 
>> In which case you're not so much fixing a domain crash by providing a
>> workaround, as actually implementing a bit of functionality.
>>
>> If so I think the commit message could use expanding.  What about
>> something like the following (assuming I haven't misunderstood what's
>> going on):
>>
>> ---
>> In hardware, when PCID support is enabled and the X86_CR3_NOFLUSH bit is
>> set when writing a CR3 value, the hardware will clear that that bit and
>> change the CR3 without flushing the TLB.  hvm_set_cr3(), however, was
>> ignoring this bit; the result was that post-emulation checks detected an
>> invalid CR3 value and crashed the domain.
>>
>> Handle X86_CR3_NOFLUSH in hvm_set_cr3() by:
>> 1. Clearing the bit
>> 2. Passing a "noflush" flag to lower-level cr3 setting functions to
>> indicate that a flush should not be performed.
>>
>> Also clear X86_CR3_NOFLUSH when reporting CR3 monitored CR3 writes.
>>
>> This allows introspection to be used on VMs whose operating system uses
>> the NOFLUSH bit.
>> ---
> 
> Fair enough, I'm happy to change the commit message if nobody else
> objects / has other changes in mind.
> 
>> As an aside -- are you sure clearing the NOFLUSH from reported CR3
>> values during introspection is the right thing to do?  You don't think
>> your introspection engine will ever want to know if the guest OS is
>> setting this bit?
> 
> We can't be sure this will never be useful to know, but at least for now
> I've not seen any requests to be able to, and our introspection engine
> is not interested in the information (in fact, one of the reasons why
> we've even missed the problem until it's been reported is that we
> haven't even been subscribing to CR3 write events for a while now).
> 
> So as far as we're concerned, losing the information about the NOFLUSH
> bit is no problem at all (and it's definitely preferable to a domain
> crash). Since Tamas has acked the patch, it's safe to assume that they
> have no problem with it either, and Bitweasil seemed happy with the
> solution as well.
> 
> I suppose we can always write a patch later if it turns out that this is
> valuable information.

Well if you want to maintain backwards compatibility, you'd need to
either have the bit opt-in, or pass the noflush bit back somewhere else
(either with a flag or with a different part of the struct).

If everyone is happy with it I don't mind.

 -George

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set
  2018-02-27 16:26         ` George Dunlap
@ 2018-02-27 17:04           ` Razvan Cojocaru
  0 siblings, 0 replies; 13+ messages in thread
From: Razvan Cojocaru @ 2018-02-27 17:04 UTC (permalink / raw)
  To: George Dunlap, Tian, Kevin, xen-devel
  Cc: Nakajima, Jun, tamas, suravee.suthikulpanit, george.dunlap,
	andrew.cooper3, tim, jbeulich, boris.ostrovsky

On 02/27/2018 06:26 PM, George Dunlap wrote:
>>> As an aside -- are you sure clearing the NOFLUSH from reported CR3
>>> values during introspection is the right thing to do?  You don't think
>>> your introspection engine will ever want to know if the guest OS is
>>> setting this bit?
>>
>> We can't be sure this will never be useful to know, but at least for now
>> I've not seen any requests to be able to, and our introspection engine
>> is not interested in the information (in fact, one of the reasons why
>> we've even missed the problem until it's been reported is that we
>> haven't even been subscribing to CR3 write events for a while now).
>>
>> So as far as we're concerned, losing the information about the NOFLUSH
>> bit is no problem at all (and it's definitely preferable to a domain
>> crash). Since Tamas has acked the patch, it's safe to assume that they
>> have no problem with it either, and Bitweasil seemed happy with the
>> solution as well.
>>
>> I suppose we can always write a patch later if it turns out that this is
>> valuable information.
> 
> Well if you want to maintain backwards compatibility, you'd need to
> either have the bit opt-in, or pass the noflush bit back somewhere else
> (either with a flag or with a different part of the struct).
> 
> If everyone is happy with it I don't mind.

A bool-like .noflush field could be nice, except it would only apply to
CR3 but affect both the common CR structure:

243 struct vm_event_write_ctrlreg {
244     uint32_t index;
245     uint32_t _pad;
246     uint64_t new_value;
247     uint64_t old_value;
248 };

and the common CR monitor function:

 33 bool hvm_monitor_cr(unsigned int index, unsigned long value,
unsigned long old)
 34 {
 35     struct vcpu *curr = current;
 36     struct arch_domain *ad = &curr->domain->arch;
 37     unsigned int ctrlreg_bitmask = monitor_ctrlreg_bitmask(index);
 38
 39     if ( index == VM_EVENT_X86_CR3 && hvm_pcid_enabled(curr) )
 40         value &= ~X86_CR3_NOFLUSH; /* Clear the noflush bit. */
 41
 42     if ( (ad->monitor.write_ctrlreg_enabled & ctrlreg_bitmask) &&
 43          (!(ad->monitor.write_ctrlreg_onchangeonly & ctrlreg_bitmask) ||
 44           value != old) &&
 45          ((value ^ old) & ~ad->monitor.write_ctrlreg_mask[index]) )
 46     {
 47         bool sync = ad->monitor.write_ctrlreg_sync & ctrlreg_bitmask;
 48
 49         vm_event_request_t req = {
 50             .reason = VM_EVENT_REASON_WRITE_CTRLREG,
 51             .u.write_ctrlreg.index = index,
 52             .u.write_ctrlreg.new_value = value,
 53             .u.write_ctrlreg.old_value = old
 54         };
 55
 56         if ( monitor_traps(curr, sync, &req) >= 0 )
 57             return 1;
 58     }
 59
 60     return 0;
 61 }

There's the additional problem of old vs. new values, as compared in the
above code: the way things work, the previous (old) value will always be
NOFLUSH-free (since we clear the NOFLUSH bit before storing). That means
that a NOFLUSH-set new value, identical to the old one except for the
NOFLUSH bit will trigger an "onchangeonly" event - which we don't want,
since the actual values are really identical (it's just that the NOFLUSH
bit has been removed before storing previously).

For example: previously we had (0x1 | NOFLUSH). We didn't flush, and
we've cleared the NOFLUSH bit, and now old value == 0x1.

CR3 is now being set again to (0x1 | NOFLUSH). Compared to the old
value, it's different, but is it? Was the previous value simply 0x1? Or
was it (0x1 | NOFLUSH)?


Thanks,
Razvan

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2018-02-27 17:04 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-16 10:22 [PATCH V5] x86/hvm: fix domain crash when CR3 has the noflush bit set Razvan Cojocaru
2018-02-16 11:17 ` Jan Beulich
2018-02-19  8:48   ` Razvan Cojocaru
2018-02-19  8:53     ` Jan Beulich
2018-02-19  8:59       ` Razvan Cojocaru
2018-02-23  4:53 ` Tian, Kevin
2018-02-23  7:29   ` Razvan Cojocaru
2018-02-23  7:31     ` Razvan Cojocaru
2018-02-24  2:35       ` Tian, Kevin
2018-02-27 15:53     ` George Dunlap
2018-02-27 16:19       ` Razvan Cojocaru
2018-02-27 16:26         ` George Dunlap
2018-02-27 17:04           ` Razvan Cojocaru

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.