All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
@ 2013-10-21 15:55 Liu, Jinsong
  2013-10-22 14:55 ` Jan Beulich
                   ` (3 more replies)
  0 siblings, 4 replies; 22+ messages in thread
From: Liu, Jinsong @ 2013-10-21 15:55 UTC (permalink / raw)
  To: Jan Beulich, Tim Deegan, Andrew Cooper
  Cc: Keir Fraser, Nakajima, Jun, zhenzhong.duan, xen-devel, Auld,
	Will, suravee.suthikulpanit, sherry.hurwitz

[-- Attachment #1: Type: text/plain, Size: 11393 bytes --]

>From 4ff1e2955f67954e60562b29a00adea89e5b93ae Mon Sep 17 00:00:00 2001
From: Liu Jinsong <jinsong.liu@intel.com>
Date: Thu, 17 Oct 2013 05:49:23 +0800
Subject: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling

This patch solves XSA-60 security hole:
1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need
do nothing, since hardware snoop mechanism has ensured cache coherency.

2. For guest with VT-d but non-snooped, cache coherency can not be
guaranteed by h/w snoop, therefore it need emulate UC type to guest:
2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so that
guest memory type are all UC.
2.2). if it works w/ shadow, drop all shadows so that any new ones would
be created on demand w/ UC.

This patch also fix a bug of shadow cr0.cd setting. Current shadow has a
small window between cache flush and TLB invalidation, resulting in possilbe
cache pollution. This patch pause vcpus so that no vcpus context involved
into the window. 

Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>
---
 xen/arch/x86/hvm/hvm.c            |   75 ++++++++++++++++++++++--------------
 xen/arch/x86/hvm/vmx/vmcs.c       |    2 +-
 xen/arch/x86/hvm/vmx/vmx.c        |   58 +++++++++++++++++++++++++++-
 xen/common/domain.c               |   10 +++++
 xen/include/asm-x86/hvm/hvm.h     |    1 +
 xen/include/asm-x86/hvm/support.h |    2 +
 xen/include/xen/sched.h           |    1 +
 7 files changed, 117 insertions(+), 32 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 688a943..df021de 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1701,6 +1701,40 @@ int hvm_mov_from_cr(unsigned int cr, unsigned int gpr)
     return X86EMUL_UNHANDLEABLE;
 }
 
+void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value)
+{
+    if ( value & X86_CR0_CD )
+    {
+        /* Entering no fill cache mode. */
+        spin_lock(&v->domain->arch.hvm_domain.uc_lock);
+        v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
+
+        if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
+        {
+            domain_pause_nosync(v->domain);
+
+            /* Flush physical caches. */
+            on_each_cpu(local_flush_cache, NULL, 1);
+            hvm_set_uc_mode(v, 1);
+
+            domain_unpause(v->domain);
+        }
+        spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
+    }
+    else if ( !(value & X86_CR0_CD) &&
+              (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
+    {
+        /* Exit from no fill cache mode. */
+        spin_lock(&v->domain->arch.hvm_domain.uc_lock);
+        v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
+
+        if ( domain_exit_uc_mode(v) )
+            hvm_set_uc_mode(v, 0);
+
+        spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
+    }
+}
+
 int hvm_set_cr0(unsigned long value)
 {
     struct vcpu *v = current;
@@ -1784,35 +1818,18 @@ int hvm_set_cr0(unsigned long value)
         }
     }
 
-    if ( has_arch_mmios(v->domain) )
-    {
-        if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )
-        {
-            /* Entering no fill cache mode. */
-            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
-            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
-
-            if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
-            {
-                /* Flush physical caches. */
-                on_each_cpu(local_flush_cache, NULL, 1);
-                hvm_set_uc_mode(v, 1);
-            }
-            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
-        }
-        else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) &&
-                  (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
-        {
-            /* Exit from no fill cache mode. */
-            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
-            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
-
-            if ( domain_exit_uc_mode(v) )
-                hvm_set_uc_mode(v, 0);
-
-            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
-        }
-    }
+    /*
+     * When cr0.cd setting
+     * 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need not
+     * do anything, since hardware snoop mechanism has ensured cache coherency;
+     * 2. For guest with VT-d but non-snooped, cache coherency cannot be
+     * guaranteed by h/w so need emulate UC memory type to guest.
+     */
+    if ( ((value ^ old_value) & X86_CR0_CD) &&
+           has_arch_pdevs(v->domain) &&
+           iommu_enabled && !iommu_snoop &&
+           hvm_funcs.handle_cd )
+        hvm_funcs.handle_cd(v, value);
 
     v->arch.hvm_vcpu.guest_cr[0] = value;
     hvm_update_guest_cr(v, 0);
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 6916c6d..7a01b1f 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -921,7 +921,7 @@ static int construct_vmcs(struct vcpu *v)
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, MSR_TYPE_R | MSR_TYPE_W);
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, MSR_TYPE_R | MSR_TYPE_W);
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R | MSR_TYPE_W);
-        if ( paging_mode_hap(d) )
+        if ( paging_mode_hap(d) && (!iommu_enabled || iommu_snoop) )
             vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R | MSR_TYPE_W);
     }
 
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 6dedb29..d846a9c 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -642,6 +642,10 @@ static void vmx_ctxt_switch_to(struct vcpu *v)
             __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept_data), 0);
     }
 
+    /* For guest cr0.cd setting, do not use potentially polluted cache */
+    if ( unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
+        wbinvd();
+
     vmx_restore_guest_msrs(v);
     vmx_restore_dr(v);
 }
@@ -908,7 +912,8 @@ static unsigned long vmx_get_shadow_gs_base(struct vcpu *v)
 
 static int vmx_set_guest_pat(struct vcpu *v, u64 gpat)
 {
-    if ( !paging_mode_hap(v->domain) )
+    if ( !paging_mode_hap(v->domain) ||
+         unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
         return 0;
 
     vmx_vmcs_enter(v);
@@ -919,7 +924,8 @@ static int vmx_set_guest_pat(struct vcpu *v, u64 gpat)
 
 static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
 {
-    if ( !paging_mode_hap(v->domain) )
+    if ( !paging_mode_hap(v->domain) ||
+         unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
         return 0;
 
     vmx_vmcs_enter(v);
@@ -928,6 +934,53 @@ static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
     return 1;
 }
 
+static void vmx_handle_cd(struct vcpu *v, unsigned long value)
+{
+    if ( !paging_mode_hap(v->domain) )
+    {
+        /*
+         * For shadow, 'load IA32_PAT' VM-entry control is 0, so it cannot
+         * set guest memory type as UC via IA32_PAT. Xen drop all shadows
+         * so that any new ones would be created on demand.
+         */
+        hvm_shadow_handle_cd(v, value);
+    }
+    else
+    {
+        u64 *pat = &v->arch.hvm_vcpu.pat_cr;
+
+        if ( value & X86_CR0_CD )
+        {
+            /*
+             * For EPT, set guest IA32_PAT fields as UC so that guest
+             * memory type are all UC.
+             */
+            u64 uc_pat =
+                ((uint64_t)PAT_TYPE_UNCACHABLE)       |       /* PAT0 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 8)  |       /* PAT1 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 16) |       /* PAT2 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 24) |       /* PAT3 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 32) |       /* PAT4 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 40) |       /* PAT5 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 48) |       /* PAT6 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 56);        /* PAT7 */
+
+            vmx_get_guest_pat(v, pat);
+            vmx_set_guest_pat(v, uc_pat);
+
+            wbinvd();               /* flush possibly polluted cache */
+            hvm_asid_flush_vcpu(v); /* invalidate memory type cached in TLB */
+            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
+        }
+        else
+        {
+            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
+            vmx_set_guest_pat(v, *pat);
+            hvm_asid_flush_vcpu(v); /* no need to flush cache */
+        }
+    }
+}
+
 static void vmx_set_tsc_offset(struct vcpu *v, u64 offset)
 {
     vmx_vmcs_enter(v);
@@ -1550,6 +1603,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
     .msr_read_intercept   = vmx_msr_read_intercept,
     .msr_write_intercept  = vmx_msr_write_intercept,
     .invlpg_intercept     = vmx_invlpg_intercept,
+    .handle_cd            = vmx_handle_cd,
     .set_info_guest       = vmx_set_info_guest,
     .set_rdtsc_exiting    = vmx_set_rdtsc_exiting,
     .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 5999779..ce20323 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -825,6 +825,16 @@ void domain_pause(struct domain *d)
         vcpu_sleep_sync(v);
 }
 
+void domain_pause_nosync(struct domain *d)
+{
+    struct vcpu *v;
+
+    atomic_inc(&d->pause_count);
+
+    for_each_vcpu( d, v )
+        vcpu_sleep_nosync(v);
+}
+
 void domain_unpause(struct domain *d)
 {
     struct vcpu *v;
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 8dd2b40..c9afb56 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -156,6 +156,7 @@ struct hvm_function_table {
     int (*msr_read_intercept)(unsigned int msr, uint64_t *msr_content);
     int (*msr_write_intercept)(unsigned int msr, uint64_t msr_content);
     void (*invlpg_intercept)(unsigned long vaddr);
+    void (*handle_cd)(struct vcpu *v, unsigned long value);
     void (*set_info_guest)(struct vcpu *v);
     void (*set_rdtsc_exiting)(struct vcpu *v, bool_t);
 
diff --git a/xen/include/asm-x86/hvm/support.h b/xen/include/asm-x86/hvm/support.h
index 7ddc806..52aef1f 100644
--- a/xen/include/asm-x86/hvm/support.h
+++ b/xen/include/asm-x86/hvm/support.h
@@ -130,6 +130,8 @@ void hvm_rdtsc_intercept(struct cpu_user_regs *regs);
 
 int __must_check hvm_handle_xsetbv(u32 index, u64 new_bv);
 
+void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value);
+
 /* These functions all return X86EMUL return codes. */
 int hvm_set_efer(uint64_t value);
 int hvm_set_cr0(unsigned long value);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 1765e18..82f522e 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -700,6 +700,7 @@ void vcpu_unblock(struct vcpu *v);
 void vcpu_pause(struct vcpu *v);
 void vcpu_pause_nosync(struct vcpu *v);
 void domain_pause(struct domain *d);
+void domain_pause_nosync(struct domain *d);
 void vcpu_unpause(struct vcpu *v);
 void domain_unpause(struct domain *d);
 void domain_pause_by_systemcontroller(struct domain *d);
-- 
1.7.1

[-- Attachment #2: 0003-XSA-60-security-hole-cr0.cd-handling.patch --]
[-- Type: application/octet-stream, Size: 11394 bytes --]

From 4ff1e2955f67954e60562b29a00adea89e5b93ae Mon Sep 17 00:00:00 2001
From: Liu Jinsong <jinsong.liu@intel.com>
Date: Thu, 17 Oct 2013 05:49:23 +0800
Subject: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling

This patch solves XSA-60 security hole:
1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need
do nothing, since hardware snoop mechanism has ensured cache coherency.

2. For guest with VT-d but non-snooped, cache coherency can not be
guaranteed by h/w snoop, therefore it need emulate UC type to guest:
2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so that
guest memory type are all UC.
2.2). if it works w/ shadow, drop all shadows so that any new ones would
be created on demand w/ UC.

This patch also fix a bug of shadow cr0.cd setting. Current shadow has a
small window between cache flush and TLB invalidation, resulting in possilbe
cache pollution. This patch pause vcpus so that no vcpus context involved
into the window. 

Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>
---
 xen/arch/x86/hvm/hvm.c            |   75 ++++++++++++++++++++++--------------
 xen/arch/x86/hvm/vmx/vmcs.c       |    2 +-
 xen/arch/x86/hvm/vmx/vmx.c        |   58 +++++++++++++++++++++++++++-
 xen/common/domain.c               |   10 +++++
 xen/include/asm-x86/hvm/hvm.h     |    1 +
 xen/include/asm-x86/hvm/support.h |    2 +
 xen/include/xen/sched.h           |    1 +
 7 files changed, 117 insertions(+), 32 deletions(-)

diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index 688a943..df021de 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -1701,6 +1701,40 @@ int hvm_mov_from_cr(unsigned int cr, unsigned int gpr)
     return X86EMUL_UNHANDLEABLE;
 }
 
+void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value)
+{
+    if ( value & X86_CR0_CD )
+    {
+        /* Entering no fill cache mode. */
+        spin_lock(&v->domain->arch.hvm_domain.uc_lock);
+        v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
+
+        if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
+        {
+            domain_pause_nosync(v->domain);
+
+            /* Flush physical caches. */
+            on_each_cpu(local_flush_cache, NULL, 1);
+            hvm_set_uc_mode(v, 1);
+
+            domain_unpause(v->domain);
+        }
+        spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
+    }
+    else if ( !(value & X86_CR0_CD) &&
+              (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
+    {
+        /* Exit from no fill cache mode. */
+        spin_lock(&v->domain->arch.hvm_domain.uc_lock);
+        v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
+
+        if ( domain_exit_uc_mode(v) )
+            hvm_set_uc_mode(v, 0);
+
+        spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
+    }
+}
+
 int hvm_set_cr0(unsigned long value)
 {
     struct vcpu *v = current;
@@ -1784,35 +1818,18 @@ int hvm_set_cr0(unsigned long value)
         }
     }
 
-    if ( has_arch_mmios(v->domain) )
-    {
-        if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )
-        {
-            /* Entering no fill cache mode. */
-            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
-            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
-
-            if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
-            {
-                /* Flush physical caches. */
-                on_each_cpu(local_flush_cache, NULL, 1);
-                hvm_set_uc_mode(v, 1);
-            }
-            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
-        }
-        else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) &&
-                  (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
-        {
-            /* Exit from no fill cache mode. */
-            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
-            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
-
-            if ( domain_exit_uc_mode(v) )
-                hvm_set_uc_mode(v, 0);
-
-            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
-        }
-    }
+    /*
+     * When cr0.cd setting
+     * 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need not
+     * do anything, since hardware snoop mechanism has ensured cache coherency;
+     * 2. For guest with VT-d but non-snooped, cache coherency cannot be
+     * guaranteed by h/w so need emulate UC memory type to guest.
+     */
+    if ( ((value ^ old_value) & X86_CR0_CD) &&
+           has_arch_pdevs(v->domain) &&
+           iommu_enabled && !iommu_snoop &&
+           hvm_funcs.handle_cd )
+        hvm_funcs.handle_cd(v, value);
 
     v->arch.hvm_vcpu.guest_cr[0] = value;
     hvm_update_guest_cr(v, 0);
diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
index 6916c6d..7a01b1f 100644
--- a/xen/arch/x86/hvm/vmx/vmcs.c
+++ b/xen/arch/x86/hvm/vmx/vmcs.c
@@ -921,7 +921,7 @@ static int construct_vmcs(struct vcpu *v)
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, MSR_TYPE_R | MSR_TYPE_W);
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, MSR_TYPE_R | MSR_TYPE_W);
         vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R | MSR_TYPE_W);
-        if ( paging_mode_hap(d) )
+        if ( paging_mode_hap(d) && (!iommu_enabled || iommu_snoop) )
             vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R | MSR_TYPE_W);
     }
 
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 6dedb29..d846a9c 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -642,6 +642,10 @@ static void vmx_ctxt_switch_to(struct vcpu *v)
             __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept_data), 0);
     }
 
+    /* For guest cr0.cd setting, do not use potentially polluted cache */
+    if ( unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
+        wbinvd();
+
     vmx_restore_guest_msrs(v);
     vmx_restore_dr(v);
 }
@@ -908,7 +912,8 @@ static unsigned long vmx_get_shadow_gs_base(struct vcpu *v)
 
 static int vmx_set_guest_pat(struct vcpu *v, u64 gpat)
 {
-    if ( !paging_mode_hap(v->domain) )
+    if ( !paging_mode_hap(v->domain) ||
+         unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
         return 0;
 
     vmx_vmcs_enter(v);
@@ -919,7 +924,8 @@ static int vmx_set_guest_pat(struct vcpu *v, u64 gpat)
 
 static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
 {
-    if ( !paging_mode_hap(v->domain) )
+    if ( !paging_mode_hap(v->domain) ||
+         unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
         return 0;
 
     vmx_vmcs_enter(v);
@@ -928,6 +934,53 @@ static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
     return 1;
 }
 
+static void vmx_handle_cd(struct vcpu *v, unsigned long value)
+{
+    if ( !paging_mode_hap(v->domain) )
+    {
+        /*
+         * For shadow, 'load IA32_PAT' VM-entry control is 0, so it cannot
+         * set guest memory type as UC via IA32_PAT. Xen drop all shadows
+         * so that any new ones would be created on demand.
+         */
+        hvm_shadow_handle_cd(v, value);
+    }
+    else
+    {
+        u64 *pat = &v->arch.hvm_vcpu.pat_cr;
+
+        if ( value & X86_CR0_CD )
+        {
+            /*
+             * For EPT, set guest IA32_PAT fields as UC so that guest
+             * memory type are all UC.
+             */
+            u64 uc_pat =
+                ((uint64_t)PAT_TYPE_UNCACHABLE)       |       /* PAT0 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 8)  |       /* PAT1 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 16) |       /* PAT2 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 24) |       /* PAT3 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 32) |       /* PAT4 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 40) |       /* PAT5 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 48) |       /* PAT6 */
+                ((uint64_t)PAT_TYPE_UNCACHABLE << 56);        /* PAT7 */
+
+            vmx_get_guest_pat(v, pat);
+            vmx_set_guest_pat(v, uc_pat);
+
+            wbinvd();               /* flush possibly polluted cache */
+            hvm_asid_flush_vcpu(v); /* invalidate memory type cached in TLB */
+            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
+        }
+        else
+        {
+            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
+            vmx_set_guest_pat(v, *pat);
+            hvm_asid_flush_vcpu(v); /* no need to flush cache */
+        }
+    }
+}
+
 static void vmx_set_tsc_offset(struct vcpu *v, u64 offset)
 {
     vmx_vmcs_enter(v);
@@ -1550,6 +1603,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
     .msr_read_intercept   = vmx_msr_read_intercept,
     .msr_write_intercept  = vmx_msr_write_intercept,
     .invlpg_intercept     = vmx_invlpg_intercept,
+    .handle_cd            = vmx_handle_cd,
     .set_info_guest       = vmx_set_info_guest,
     .set_rdtsc_exiting    = vmx_set_rdtsc_exiting,
     .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 5999779..ce20323 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -825,6 +825,16 @@ void domain_pause(struct domain *d)
         vcpu_sleep_sync(v);
 }
 
+void domain_pause_nosync(struct domain *d)
+{
+    struct vcpu *v;
+
+    atomic_inc(&d->pause_count);
+
+    for_each_vcpu( d, v )
+        vcpu_sleep_nosync(v);
+}
+
 void domain_unpause(struct domain *d)
 {
     struct vcpu *v;
diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
index 8dd2b40..c9afb56 100644
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -156,6 +156,7 @@ struct hvm_function_table {
     int (*msr_read_intercept)(unsigned int msr, uint64_t *msr_content);
     int (*msr_write_intercept)(unsigned int msr, uint64_t msr_content);
     void (*invlpg_intercept)(unsigned long vaddr);
+    void (*handle_cd)(struct vcpu *v, unsigned long value);
     void (*set_info_guest)(struct vcpu *v);
     void (*set_rdtsc_exiting)(struct vcpu *v, bool_t);
 
diff --git a/xen/include/asm-x86/hvm/support.h b/xen/include/asm-x86/hvm/support.h
index 7ddc806..52aef1f 100644
--- a/xen/include/asm-x86/hvm/support.h
+++ b/xen/include/asm-x86/hvm/support.h
@@ -130,6 +130,8 @@ void hvm_rdtsc_intercept(struct cpu_user_regs *regs);
 
 int __must_check hvm_handle_xsetbv(u32 index, u64 new_bv);
 
+void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value);
+
 /* These functions all return X86EMUL return codes. */
 int hvm_set_efer(uint64_t value);
 int hvm_set_cr0(unsigned long value);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 1765e18..82f522e 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -700,6 +700,7 @@ void vcpu_unblock(struct vcpu *v);
 void vcpu_pause(struct vcpu *v);
 void vcpu_pause_nosync(struct vcpu *v);
 void domain_pause(struct domain *d);
+void domain_pause_nosync(struct domain *d);
 void vcpu_unpause(struct vcpu *v);
 void domain_unpause(struct domain *d);
 void domain_pause_by_systemcontroller(struct domain *d);
-- 
1.7.1


[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-21 15:55 [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling Liu, Jinsong
@ 2013-10-22 14:55 ` Jan Beulich
  2013-10-23  8:48   ` DuanZhenzhong
  2013-10-23 16:29   ` Nakajima, Jun
  2013-10-22 15:26 ` Tim Deegan
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 22+ messages in thread
From: Jan Beulich @ 2013-10-22 14:55 UTC (permalink / raw)
  To: Jinsong Liu
  Cc: Tim Deegan, Keir Fraser, suravee.suthikulpanit, Andrew Cooper,
	Eddie Dong, zhenzhong.duan, xen-devel, Will Auld, Jun Nakajima,
	sherry.hurwitz

>>> On 21.10.13 at 17:55, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
> From 4ff1e2955f67954e60562b29a00adea89e5b93ae Mon Sep 17 00:00:00 2001
> From: Liu Jinsong <jinsong.liu@intel.com>
> Date: Thu, 17 Oct 2013 05:49:23 +0800
> Subject: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
> 
> This patch solves XSA-60 security hole:
> 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need
> do nothing, since hardware snoop mechanism has ensured cache coherency.
> 
> 2. For guest with VT-d but non-snooped, cache coherency can not be
> guaranteed by h/w snoop, therefore it need emulate UC type to guest:
> 2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so that
> guest memory type are all UC.
> 2.2). if it works w/ shadow, drop all shadows so that any new ones would
> be created on demand w/ UC.
> 
> This patch also fix a bug of shadow cr0.cd setting. Current shadow has a
> small window between cache flush and TLB invalidation, resulting in possilbe
> cache pollution. This patch pause vcpus so that no vcpus context involved
> into the window. 
> 
> Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>

This looks fine to me now, but will need acks/reviews at least from
- Keir (whose blessing of the pausing construct I'd like to have even
  if this didn't involve changing non-x86 files)
- one of the VMX maintainers
- one or both of Tim and Andrew

And of course I'd really appreciate if Oracle could arrange for
testing this, to confirm their performance problem is also gone with
this.

Thanks, Jan

> ---
>  xen/arch/x86/hvm/hvm.c            |   75 ++++++++++++++++++++++--------------
>  xen/arch/x86/hvm/vmx/vmcs.c       |    2 +-
>  xen/arch/x86/hvm/vmx/vmx.c        |   58 +++++++++++++++++++++++++++-
>  xen/common/domain.c               |   10 +++++
>  xen/include/asm-x86/hvm/hvm.h     |    1 +
>  xen/include/asm-x86/hvm/support.h |    2 +
>  xen/include/xen/sched.h           |    1 +
>  7 files changed, 117 insertions(+), 32 deletions(-)
> 
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 688a943..df021de 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -1701,6 +1701,40 @@ int hvm_mov_from_cr(unsigned int cr, unsigned int gpr)
>      return X86EMUL_UNHANDLEABLE;
>  }
>  
> +void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value)
> +{
> +    if ( value & X86_CR0_CD )
> +    {
> +        /* Entering no fill cache mode. */
> +        spin_lock(&v->domain->arch.hvm_domain.uc_lock);
> +        v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
> +
> +        if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
> +        {
> +            domain_pause_nosync(v->domain);
> +
> +            /* Flush physical caches. */
> +            on_each_cpu(local_flush_cache, NULL, 1);
> +            hvm_set_uc_mode(v, 1);
> +
> +            domain_unpause(v->domain);
> +        }
> +        spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
> +    }
> +    else if ( !(value & X86_CR0_CD) &&
> +              (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
> +    {
> +        /* Exit from no fill cache mode. */
> +        spin_lock(&v->domain->arch.hvm_domain.uc_lock);
> +        v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
> +
> +        if ( domain_exit_uc_mode(v) )
> +            hvm_set_uc_mode(v, 0);
> +
> +        spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
> +    }
> +}
> +
>  int hvm_set_cr0(unsigned long value)
>  {
>      struct vcpu *v = current;
> @@ -1784,35 +1818,18 @@ int hvm_set_cr0(unsigned long value)
>          }
>      }
>  
> -    if ( has_arch_mmios(v->domain) )
> -    {
> -        if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )
> -        {
> -            /* Entering no fill cache mode. */
> -            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
> -            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
> -
> -            if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
> -            {
> -                /* Flush physical caches. */
> -                on_each_cpu(local_flush_cache, NULL, 1);
> -                hvm_set_uc_mode(v, 1);
> -            }
> -            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
> -        }
> -        else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) &&
> -                  (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
> -        {
> -            /* Exit from no fill cache mode. */
> -            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
> -            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
> -
> -            if ( domain_exit_uc_mode(v) )
> -                hvm_set_uc_mode(v, 0);
> -
> -            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
> -        }
> -    }
> +    /*
> +     * When cr0.cd setting
> +     * 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need 
> not
> +     * do anything, since hardware snoop mechanism has ensured cache 
> coherency;
> +     * 2. For guest with VT-d but non-snooped, cache coherency cannot be
> +     * guaranteed by h/w so need emulate UC memory type to guest.
> +     */
> +    if ( ((value ^ old_value) & X86_CR0_CD) &&
> +           has_arch_pdevs(v->domain) &&
> +           iommu_enabled && !iommu_snoop &&
> +           hvm_funcs.handle_cd )
> +        hvm_funcs.handle_cd(v, value);
>  
>      v->arch.hvm_vcpu.guest_cr[0] = value;
>      hvm_update_guest_cr(v, 0);
> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
> index 6916c6d..7a01b1f 100644
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -921,7 +921,7 @@ static int construct_vmcs(struct vcpu *v)
>          vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, MSR_TYPE_R | 
> MSR_TYPE_W);
>          vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, MSR_TYPE_R 
> | MSR_TYPE_W);
>          vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R 
> | MSR_TYPE_W);
> -        if ( paging_mode_hap(d) )
> +        if ( paging_mode_hap(d) && (!iommu_enabled || iommu_snoop) )
>              vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R | 
> MSR_TYPE_W);
>      }
>  
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 6dedb29..d846a9c 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -642,6 +642,10 @@ static void vmx_ctxt_switch_to(struct vcpu *v)
>              __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept_data), 0);
>      }
>  
> +    /* For guest cr0.cd setting, do not use potentially polluted cache */
> +    if ( unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
> +        wbinvd();
> +
>      vmx_restore_guest_msrs(v);
>      vmx_restore_dr(v);
>  }
> @@ -908,7 +912,8 @@ static unsigned long vmx_get_shadow_gs_base(struct vcpu 
> *v)
>  
>  static int vmx_set_guest_pat(struct vcpu *v, u64 gpat)
>  {
> -    if ( !paging_mode_hap(v->domain) )
> +    if ( !paging_mode_hap(v->domain) ||
> +         unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
>          return 0;
>  
>      vmx_vmcs_enter(v);
> @@ -919,7 +924,8 @@ static int vmx_set_guest_pat(struct vcpu *v, u64 gpat)
>  
>  static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
>  {
> -    if ( !paging_mode_hap(v->domain) )
> +    if ( !paging_mode_hap(v->domain) ||
> +         unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
>          return 0;
>  
>      vmx_vmcs_enter(v);
> @@ -928,6 +934,53 @@ static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
>      return 1;
>  }
>  
> +static void vmx_handle_cd(struct vcpu *v, unsigned long value)
> +{
> +    if ( !paging_mode_hap(v->domain) )
> +    {
> +        /*
> +         * For shadow, 'load IA32_PAT' VM-entry control is 0, so it cannot
> +         * set guest memory type as UC via IA32_PAT. Xen drop all shadows
> +         * so that any new ones would be created on demand.
> +         */
> +        hvm_shadow_handle_cd(v, value);
> +    }
> +    else
> +    {
> +        u64 *pat = &v->arch.hvm_vcpu.pat_cr;
> +
> +        if ( value & X86_CR0_CD )
> +        {
> +            /*
> +             * For EPT, set guest IA32_PAT fields as UC so that guest
> +             * memory type are all UC.
> +             */
> +            u64 uc_pat =
> +                ((uint64_t)PAT_TYPE_UNCACHABLE)       |       /* PAT0 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 8)  |       /* PAT1 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 16) |       /* PAT2 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 24) |       /* PAT3 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 32) |       /* PAT4 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 40) |       /* PAT5 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 48) |       /* PAT6 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 56);        /* PAT7 */
> +
> +            vmx_get_guest_pat(v, pat);
> +            vmx_set_guest_pat(v, uc_pat);
> +
> +            wbinvd();               /* flush possibly polluted cache */
> +            hvm_asid_flush_vcpu(v); /* invalidate memory type cached in TLB 
> */
> +            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
> +        }
> +        else
> +        {
> +            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
> +            vmx_set_guest_pat(v, *pat);
> +            hvm_asid_flush_vcpu(v); /* no need to flush cache */
> +        }
> +    }
> +}
> +
>  static void vmx_set_tsc_offset(struct vcpu *v, u64 offset)
>  {
>      vmx_vmcs_enter(v);
> @@ -1550,6 +1603,7 @@ static struct hvm_function_table __initdata 
> vmx_function_table = {
>      .msr_read_intercept   = vmx_msr_read_intercept,
>      .msr_write_intercept  = vmx_msr_write_intercept,
>      .invlpg_intercept     = vmx_invlpg_intercept,
> +    .handle_cd            = vmx_handle_cd,
>      .set_info_guest       = vmx_set_info_guest,
>      .set_rdtsc_exiting    = vmx_set_rdtsc_exiting,
>      .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 5999779..ce20323 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -825,6 +825,16 @@ void domain_pause(struct domain *d)
>          vcpu_sleep_sync(v);
>  }
>  
> +void domain_pause_nosync(struct domain *d)
> +{
> +    struct vcpu *v;
> +
> +    atomic_inc(&d->pause_count);
> +
> +    for_each_vcpu( d, v )
> +        vcpu_sleep_nosync(v);
> +}
> +
>  void domain_unpause(struct domain *d)
>  {
>      struct vcpu *v;
> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
> index 8dd2b40..c9afb56 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -156,6 +156,7 @@ struct hvm_function_table {
>      int (*msr_read_intercept)(unsigned int msr, uint64_t *msr_content);
>      int (*msr_write_intercept)(unsigned int msr, uint64_t msr_content);
>      void (*invlpg_intercept)(unsigned long vaddr);
> +    void (*handle_cd)(struct vcpu *v, unsigned long value);
>      void (*set_info_guest)(struct vcpu *v);
>      void (*set_rdtsc_exiting)(struct vcpu *v, bool_t);
>  
> diff --git a/xen/include/asm-x86/hvm/support.h 
> b/xen/include/asm-x86/hvm/support.h
> index 7ddc806..52aef1f 100644
> --- a/xen/include/asm-x86/hvm/support.h
> +++ b/xen/include/asm-x86/hvm/support.h
> @@ -130,6 +130,8 @@ void hvm_rdtsc_intercept(struct cpu_user_regs *regs);
>  
>  int __must_check hvm_handle_xsetbv(u32 index, u64 new_bv);
>  
> +void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value);
> +
>  /* These functions all return X86EMUL return codes. */
>  int hvm_set_efer(uint64_t value);
>  int hvm_set_cr0(unsigned long value);
> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
> index 1765e18..82f522e 100644
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -700,6 +700,7 @@ void vcpu_unblock(struct vcpu *v);
>  void vcpu_pause(struct vcpu *v);
>  void vcpu_pause_nosync(struct vcpu *v);
>  void domain_pause(struct domain *d);
> +void domain_pause_nosync(struct domain *d);
>  void vcpu_unpause(struct vcpu *v);
>  void domain_unpause(struct domain *d);
>  void domain_pause_by_systemcontroller(struct domain *d);
> -- 
> 1.7.1

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-21 15:55 [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling Liu, Jinsong
  2013-10-22 14:55 ` Jan Beulich
@ 2013-10-22 15:26 ` Tim Deegan
  2013-10-23 10:16   ` Andrew Cooper
  2013-11-04  8:49 ` Zhenzhong Duan
  2013-11-05 21:06 ` Keir Fraser
  3 siblings, 1 reply; 22+ messages in thread
From: Tim Deegan @ 2013-10-22 15:26 UTC (permalink / raw)
  To: Liu, Jinsong
  Cc: Keir Fraser, Jan Beulich, Andrew Cooper, zhenzhong.duan,
	xen-devel, Auld, Will, Nakajima, Jun, sherry.hurwitz,
	suravee.suthikulpanit

At 15:55 +0000 on 21 Oct (1382367312), Liu, Jinsong wrote:
> From 4ff1e2955f67954e60562b29a00adea89e5b93ae Mon Sep 17 00:00:00 2001
> From: Liu Jinsong <jinsong.liu@intel.com>
> Date: Thu, 17 Oct 2013 05:49:23 +0800
> Subject: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
> 
> This patch solves XSA-60 security hole:
> 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need
> do nothing, since hardware snoop mechanism has ensured cache coherency.
> 
> 2. For guest with VT-d but non-snooped, cache coherency can not be
> guaranteed by h/w snoop, therefore it need emulate UC type to guest:
> 2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so that
> guest memory type are all UC.
> 2.2). if it works w/ shadow, drop all shadows so that any new ones would
> be created on demand w/ UC.
> 
> This patch also fix a bug of shadow cr0.cd setting. Current shadow has a
> small window between cache flush and TLB invalidation, resulting in possilbe
> cache pollution. This patch pause vcpus so that no vcpus context involved
> into the window. 
> 
> Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>

Reviewed-by: Tim Deegan <tim@xen.org>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-22 14:55 ` Jan Beulich
@ 2013-10-23  8:48   ` DuanZhenzhong
  2013-10-23 16:29   ` Nakajima, Jun
  1 sibling, 0 replies; 22+ messages in thread
From: DuanZhenzhong @ 2013-10-23  8:48 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Jinsong Liu, Tim Deegan, Keir Fraser, suravee.suthikulpanit,
	Andrew Cooper, Eddie Dong, xen-devel, Will Auld, Jun Nakajima,
	sherry.hurwitz

Jan Beulich wrote:
>>>> On 21.10.13 at 17:55, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
>>>>         
>> From 4ff1e2955f67954e60562b29a00adea89e5b93ae Mon Sep 17 00:00:00 2001
>> From: Liu Jinsong <jinsong.liu@intel.com>
>> Date: Thu, 17 Oct 2013 05:49:23 +0800
>> Subject: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
>>
>> This patch solves XSA-60 security hole:
>> 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need
>> do nothing, since hardware snoop mechanism has ensured cache coherency.
>>
>> 2. For guest with VT-d but non-snooped, cache coherency can not be
>> guaranteed by h/w snoop, therefore it need emulate UC type to guest:
>> 2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so that
>> guest memory type are all UC.
>> 2.2). if it works w/ shadow, drop all shadows so that any new ones would
>> be created on demand w/ UC.
>>
>> This patch also fix a bug of shadow cr0.cd setting. Current shadow has a
>> small window between cache flush and TLB invalidation, resulting in possilbe
>> cache pollution. This patch pause vcpus so that no vcpus context involved
>> into the window. 
>>
>> Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>
>>     
>
> This looks fine to me now, but will need acks/reviews at least from
> - Keir (whose blessing of the pausing construct I'd like to have even
>   if this didn't involve changing non-x86 files)
> - one of the VMX maintainers
> - one or both of Tim and Andrew
>
> And of course I'd really appreciate if Oracle could arrange for
> testing this, to confirm their performance problem is also gone with
> this.
>   
I am try finding an env to test it. I'll reply after test.

zduan

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-22 15:26 ` Tim Deegan
@ 2013-10-23 10:16   ` Andrew Cooper
  0 siblings, 0 replies; 22+ messages in thread
From: Andrew Cooper @ 2013-10-23 10:16 UTC (permalink / raw)
  To: Tim Deegan
  Cc: Liu, Jinsong, Keir Fraser, Jan Beulich, zhenzhong.duan,
	xen-devel, Auld, Will, Nakajima, Jun, sherry.hurwitz,
	suravee.suthikulpanit

On 22/10/13 16:26, Tim Deegan wrote:
> At 15:55 +0000 on 21 Oct (1382367312), Liu, Jinsong wrote:
>> From 4ff1e2955f67954e60562b29a00adea89e5b93ae Mon Sep 17 00:00:00 2001
>> From: Liu Jinsong <jinsong.liu@intel.com>
>> Date: Thu, 17 Oct 2013 05:49:23 +0800
>> Subject: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
>>
>> This patch solves XSA-60 security hole:
>> 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need
>> do nothing, since hardware snoop mechanism has ensured cache coherency.
>>
>> 2. For guest with VT-d but non-snooped, cache coherency can not be
>> guaranteed by h/w snoop, therefore it need emulate UC type to guest:
>> 2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so that
>> guest memory type are all UC.
>> 2.2). if it works w/ shadow, drop all shadows so that any new ones would
>> be created on demand w/ UC.
>>
>> This patch also fix a bug of shadow cr0.cd setting. Current shadow has a
>> small window between cache flush and TLB invalidation, resulting in possilbe
>> cache pollution. This patch pause vcpus so that no vcpus context involved
>> into the window. 
>>
>> Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>
> Reviewed-by: Tim Deegan <tim@xen.org>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-22 14:55 ` Jan Beulich
  2013-10-23  8:48   ` DuanZhenzhong
@ 2013-10-23 16:29   ` Nakajima, Jun
  2013-10-23 16:38     ` Jan Beulich
  1 sibling, 1 reply; 22+ messages in thread
From: Nakajima, Jun @ 2013-10-23 16:29 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Jinsong Liu, Tim Deegan, Keir Fraser, Andrew Cooper, Eddie Dong,
	zhenzhong.duan, xen-devel, Will Auld, suravee.suthikulpanit,
	sherry.hurwitz


[-- Attachment #1.1: Type: text/plain, Size: 1018 bytes --]

On Tue, Oct 22, 2013 at 7:55 AM, Jan Beulich <JBeulich@suse.com> wrote:

> >>> On 21.10.13 at 17:55, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
> > From 4ff1e2955f67954e60562b29a00adea89e5b93ae Mon Sep 17 00:00:00 2001
> > From: Liu Jinsong <jinsong.liu@intel.com>
> > Date: Thu, 17 Oct 2013 05:49:23 +0800
> > Subject: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
> >
> > This patch solves XSA-60 security hole:
> > 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need
> > do nothing, since hardware snoop mechanism has ensured cache coherency.
> >
> > 2. For guest with VT-d but non-snooped, cache coherency can not be
> > guaranteed by h/w snoop, therefore it need emulate UC type to guest:
> > 2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so that
> > guest memory type are all UC.
>

Can you make sure that "setting guest IA32_PAT fields as UC" doesn't have a
conflict with the existing (other) settings done by the guest?

-- 
Jun
Intel Open Source Technology Center

[-- Attachment #1.2: Type: text/html, Size: 1763 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-23 16:29   ` Nakajima, Jun
@ 2013-10-23 16:38     ` Jan Beulich
  2013-10-24 16:19       ` Liu, Jinsong
  0 siblings, 1 reply; 22+ messages in thread
From: Jan Beulich @ 2013-10-23 16:38 UTC (permalink / raw)
  To: jun.nakajima
  Cc: jinsong.liu, tim, keir, andrew.cooper3, eddie.dong,
	zhenzhong.duan, xen-devel, will.auld, suravee.suthikulpanit,
	sherry.hurwitz

>>> "Nakajima, Jun" <jun.nakajima@intel.com> 10/23/13 6:29 PM >>>
>On Tue, Oct 22, 2013 at 7:55 AM, Jan Beulich <JBeulich@suse.com> wrote:
>> >>> On 21.10.13 at 17:55, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
>> > From 4ff1e2955f67954e60562b29a00adea89e5b93ae Mon Sep 17 00:00:00 2001
>> > From: Liu Jinsong <jinsong.liu@intel.com>
>> > Date: Thu, 17 Oct 2013 05:49:23 +0800
>> > Subject: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
>> >
>> > This patch solves XSA-60 security hole:
>> > 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need
>> > do nothing, since hardware snoop mechanism has ensured cache coherency.
>> >
>> > 2. For guest with VT-d but non-snooped, cache coherency can not be
>> > guaranteed by h/w snoop, therefore it need emulate UC type to guest:
>> > 2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so that
>> > guest memory type are all UC.
>
>Can you make sure that "setting guest IA32_PAT fields as UC" doesn't have a
>conflict with the existing (other) settings done by the guest?

I don't think I understand the question, and I also don't think I'm the right
addressee (I think you meant to send this to Jinsong and only Cc me).

Jan

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-23 16:38     ` Jan Beulich
@ 2013-10-24 16:19       ` Liu, Jinsong
  2013-10-24 16:39         ` Liu, Jinsong
  0 siblings, 1 reply; 22+ messages in thread
From: Liu, Jinsong @ 2013-10-24 16:19 UTC (permalink / raw)
  To: Jan Beulich, Nakajima, Jun
  Cc: tim, keir, andrew.cooper3, Dong, Eddie, zhenzhong.duan,
	xen-devel, Auld, Will, suravee.suthikulpanit, sherry.hurwitz

Jan Beulich wrote:
>>>> "Nakajima, Jun" <jun.nakajima@intel.com> 10/23/13 6:29 PM >>>
>> On Tue, Oct 22, 2013 at 7:55 AM, Jan Beulich <JBeulich@suse.com>
>> wrote: 
>>>>>> On 21.10.13 at 17:55, "Liu, Jinsong" <jinsong.liu@intel.com>
>>>>>> wrote: 
>>>> From 4ff1e2955f67954e60562b29a00adea89e5b93ae Mon Sep 17 00:00:00
>>>> 2001 From: Liu Jinsong <jinsong.liu@intel.com>
>>>> Date: Thu, 17 Oct 2013 05:49:23 +0800
>>>> Subject: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
>>>> 
>>>> This patch solves XSA-60 security hole:
>>>> 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen
>>>> need do nothing, since hardware snoop mechanism has ensured cache
>>>> coherency. 
>>>> 
>>>> 2. For guest with VT-d but non-snooped, cache coherency can not be
>>>> guaranteed by h/w snoop, therefore it need emulate UC type to
>>>> guest: 
>>>> 2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so
>>>> that guest memory type are all UC.
>> 
>> Can you make sure that "setting guest IA32_PAT fields as UC" doesn't
>> have a conflict with the existing (other) settings done by the guest?
> 
> I don't think I understand the question, and I also don't think I'm
> the right addressee (I think you meant to send this to Jinsong and
> only Cc me). 
> 
> Jan

Maybe Jun's concern is 'guest PAT (real pat of vmcs which take effect, not nominal guest_pat) should be identical among all physical processors which run vcpus of that guest', am I right, Jun?

One thing I'm not sure is, per Intel SDM (8.7.4 of volume 3), the PAT MSR settings must be the same for all processors in a system. However, Xen obviously doesn't satisfy this requirement: PAT of the cpus running vmm context (50100070406) is not identical to PAT of the cpus running guest context (take rhel6.4 guest as example, it's 7010600070106) -- practically it works fine.


Thanks,
Jinsong

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-24 16:19       ` Liu, Jinsong
@ 2013-10-24 16:39         ` Liu, Jinsong
  2013-10-28  7:29           ` Jan Beulich
  0 siblings, 1 reply; 22+ messages in thread
From: Liu, Jinsong @ 2013-10-24 16:39 UTC (permalink / raw)
  To: Jan Beulich, Nakajima, Jun
  Cc: keir, Dong, Eddie, sherry.hurwitz, tim, zhenzhong.duan,
	xen-devel, Auld, Will, suravee.suthikulpanit, andrew.cooper3

Liu, Jinsong wrote:
> Jan Beulich wrote:
>>>>> "Nakajima, Jun" <jun.nakajima@intel.com> 10/23/13 6:29 PM >>>
>>> On Tue, Oct 22, 2013 at 7:55 AM, Jan Beulich <JBeulich@suse.com>
>>> wrote:
>>>>>>> On 21.10.13 at 17:55, "Liu, Jinsong" <jinsong.liu@intel.com>
>>>>>>> wrote:
>>>>> From 4ff1e2955f67954e60562b29a00adea89e5b93ae Mon Sep 17 00:00:00
>>>>> 2001 From: Liu Jinsong <jinsong.liu@intel.com>
>>>>> Date: Thu, 17 Oct 2013 05:49:23 +0800
>>>>> Subject: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
>>>>> 
>>>>> This patch solves XSA-60 security hole:
>>>>> 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen
>>>>> need do nothing, since hardware snoop mechanism has ensured cache
>>>>> coherency. 
>>>>> 
>>>>> 2. For guest with VT-d but non-snooped, cache coherency can not be
>>>>> guaranteed by h/w snoop, therefore it need emulate UC type to
>>>>> guest: 
>>>>> 2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so
>>>>> that guest memory type are all UC.
>>> 
>>> Can you make sure that "setting guest IA32_PAT fields as UC" doesn't
>>> have a conflict with the existing (other) settings done by the
>>> guest? 
>> 
>> I don't think I understand the question, and I also don't think I'm
>> the right addressee (I think you meant to send this to Jinsong and
>> only Cc me). 
>> 
>> Jan
> 
> Maybe Jun's concern is 'guest PAT (real pat of vmcs which take
> effect, not nominal guest_pat) should be identical among all physical
> processors which run vcpus of that guest', am I right, Jun?  
> 
> One thing I'm not sure is, per Intel SDM (8.7.4 of volume 3), the PAT
> MSR settings must be the same for all processors in a system.
> However, Xen obviously doesn't satisfy this requirement: PAT of the
> cpus running vmm context (50100070406) is not identical to PAT of the
> cpus running guest context (take rhel6.4 guest as example, it's
> 7010600070106) -- practically it works fine.

Or, PAT requirement under virtualization would better be 'PAT MSR settings must be the same for all processors of a domain (take vmm as a special domain)'? otherwise IA32_PAT fields of vmcs is pointless.

Anyway, we'd better change our patch from per-vcpu PAT emulation to per-domain PAT emulation. Does it make sense, Jun?

Thanks,
Jinsong

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-24 16:39         ` Liu, Jinsong
@ 2013-10-28  7:29           ` Jan Beulich
  2013-10-28  8:31             ` Liu, Jinsong
  0 siblings, 1 reply; 22+ messages in thread
From: Jan Beulich @ 2013-10-28  7:29 UTC (permalink / raw)
  To: Jinsong Liu, Jun Nakajima
  Cc: tim, keir, andrew.cooper3, Eddie Dong, zhenzhong.duan, xen-devel,
	Will Auld, suravee.suthikulpanit, sherry.hurwitz

>>> On 24.10.13 at 18:39, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
> Liu, Jinsong wrote:
>> Maybe Jun's concern is 'guest PAT (real pat of vmcs which take
>> effect, not nominal guest_pat) should be identical among all physical
>> processors which run vcpus of that guest', am I right, Jun?  
>> 
>> One thing I'm not sure is, per Intel SDM (8.7.4 of volume 3), the PAT
>> MSR settings must be the same for all processors in a system.
>> However, Xen obviously doesn't satisfy this requirement: PAT of the
>> cpus running vmm context (50100070406) is not identical to PAT of the
>> cpus running guest context (take rhel6.4 guest as example, it's
>> 7010600070106) -- practically it works fine.
> 
> Or, PAT requirement under virtualization would better be 'PAT MSR settings 
> must be the same for all processors of a domain (take vmm as a special 
> domain)'? otherwise IA32_PAT fields of vmcs is pointless.
> 
> Anyway, we'd better change our patch from per-vcpu PAT emulation to per-domain 
> PAT emulation. Does it make sense, Jun?

I don't think that's be in line with what we currently do, or with how
real hardware works. Unless inconsistencies between PAT settings
can be leveraged to affect the hypervisor or other guests, we
should allow the guest to have them inconsistent (as would be the
natural thing transiently when switching to a new value on all CPUs).
And if inconsistencies can have effects outside the VM, then afaict
we have this issue already without this patch.

While mentally going through this logic again I noticed, however,
that the cache flushing your patch is doing is still insufficient: Doing
this just when CD gets set and in the context switch path is not
enough. This needs to be done prior to each VM entry, unless it
can be proven that the hypervisor (or the service domain) didn't
touch guest memory.

Jan

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-28  7:29           ` Jan Beulich
@ 2013-10-28  8:31             ` Liu, Jinsong
  2013-10-28  9:29               ` Jan Beulich
  0 siblings, 1 reply; 22+ messages in thread
From: Liu, Jinsong @ 2013-10-28  8:31 UTC (permalink / raw)
  To: Jan Beulich, Nakajima, Jun
  Cc: tim, keir, andrew.cooper3, Dong, Eddie, zhenzhong.duan,
	xen-devel, Auld, Will, suravee.suthikulpanit, sherry.hurwitz

Jan Beulich wrote:
>>>> On 24.10.13 at 18:39, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
>> Liu, Jinsong wrote:
>>> Maybe Jun's concern is 'guest PAT (real pat of vmcs which take
>>> effect, not nominal guest_pat) should be identical among all
>>> physical processors which run vcpus of that guest', am I right, Jun?
>>> 
>>> One thing I'm not sure is, per Intel SDM (8.7.4 of volume 3), the
>>> PAT MSR settings must be the same for all processors in a system.
>>> However, Xen obviously doesn't satisfy this requirement: PAT of the
>>> cpus running vmm context (50100070406) is not identical to PAT of
>>> the cpus running guest context (take rhel6.4 guest as example, it's
>>> 7010600070106) -- practically it works fine.
>> 
>> Or, PAT requirement under virtualization would better be 'PAT MSR
>> settings must be the same for all processors of a domain (take vmm
>> as a special domain)'? otherwise IA32_PAT fields of vmcs is
>> pointless. 
>> 
>> Anyway, we'd better change our patch from per-vcpu PAT emulation to
>> per-domain PAT emulation. Does it make sense, Jun?
> 
> I don't think that's be in line with what we currently do, or with how
> real hardware works. Unless inconsistencies between PAT settings
> can be leveraged to affect the hypervisor or other guests, we
> should allow the guest to have them inconsistent (as would be the
> natural thing transiently when switching to a new value on all CPUs).
> And if inconsistencies can have effects outside the VM, then afaict
> we have this issue already without this patch.

Agree, let's keep current per-vcpu PAT emulation.

> 
> While mentally going through this logic again I noticed, however,
> that the cache flushing your patch is doing is still insufficient:
> Doing this just when CD gets set and in the context switch path is not
> enough. This needs to be done prior to each VM entry, unless it
> can be proven that the hypervisor (or the service domain) didn't
> touch guest memory.
> 

I think it's safe: it only need guarantee no vcpu guest context involved into the small window between cache flushing and TLB invalidation -- after that it doesn't care whether hypervisor touch guest memory or not, since cache is clear and old memory type in TLB is invalidated (and is UC afterwards), so no cache line will be polluted by guest context any more.

Thanks,
Jinsong

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-28  8:31             ` Liu, Jinsong
@ 2013-10-28  9:29               ` Jan Beulich
  2013-10-29 16:52                 ` Liu, Jinsong
  0 siblings, 1 reply; 22+ messages in thread
From: Jan Beulich @ 2013-10-28  9:29 UTC (permalink / raw)
  To: Jinsong Liu, Jun Nakajima
  Cc: tim, keir, andrew.cooper3, Eddie Dong, zhenzhong.duan, xen-devel,
	Will Auld, suravee.suthikulpanit, sherry.hurwitz

>>> On 28.10.13 at 09:31, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
> Jan Beulich wrote:
>> While mentally going through this logic again I noticed, however,
>> that the cache flushing your patch is doing is still insufficient:
>> Doing this just when CD gets set and in the context switch path is not
>> enough. This needs to be done prior to each VM entry, unless it
>> can be proven that the hypervisor (or the service domain) didn't
>> touch guest memory.
> 
> I think it's safe: it only need guarantee no vcpu guest context involved 
> into the small window between cache flushing and TLB invalidation -- after that 
> it doesn't care whether hypervisor touch guest memory or not, since cache is 
> clear and old memory type in TLB is invalidated (and is UC afterwards), so no 
> cache line will be polluted by guest context any more.

No - consider a VM exit while in this mode where, in order to process
it, the hypervisor or service domain touch guest memory. Such
touching will happen with caches being used, and hence unwritten
data may be left in the caches when exiting back to the guest when
there's no wbinvd on the VM entry path. I don't think anything is
being said explicitly anywhere on whether cache contents are being
taken into consideration when CD=0 but PAT enforces UC.

Jan

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-28  9:29               ` Jan Beulich
@ 2013-10-29 16:52                 ` Liu, Jinsong
  2013-10-29 17:20                   ` Andrew Cooper
  2013-10-30  8:05                   ` Jan Beulich
  0 siblings, 2 replies; 22+ messages in thread
From: Liu, Jinsong @ 2013-10-29 16:52 UTC (permalink / raw)
  To: Jan Beulich, Nakajima, Jun
  Cc: tim, keir, andrew.cooper3, Dong, Eddie, zhenzhong.duan,
	xen-devel, Auld, Will, suravee.suthikulpanit, sherry.hurwitz

Jan Beulich wrote:
>>>> On 28.10.13 at 09:31, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
>> Jan Beulich wrote:
>>> While mentally going through this logic again I noticed, however,
>>> that the cache flushing your patch is doing is still insufficient:
>>> Doing this just when CD gets set and in the context switch path is
>>> not enough. This needs to be done prior to each VM entry, unless it
>>> can be proven that the hypervisor (or the service domain) didn't
>>> touch guest memory.
>> 
>> I think it's safe: it only need guarantee no vcpu guest context
>> involved 
>> into the small window between cache flushing and TLB invalidation --
>> after that it doesn't care whether hypervisor touch guest memory or
>> not, since cache is clear and old memory type in TLB is invalidated
>> (and is UC afterwards), so no cache line will be polluted by guest
>> context any more. 
> 
> No - consider a VM exit while in this mode where, in order to process
> it, the hypervisor or service domain touch guest memory. Such
> touching will happen with caches being used, and hence unwritten
> data may be left in the caches when exiting back to the guest when
> there's no wbinvd on the VM entry path. I don't think anything is
> being said explicitly anywhere on whether cache contents are being
> taken into consideration when CD=0 but PAT enforces UC.
> 

I think we don't need worry about hypervisor touch guest memory when guest UC. I draft 2 tests, monitoring various events during guest UC period.

Test 1 add wbinvd before vmentry. When guest booting, it hungs 10s ~ 60s at vcpu0 UC stage, with bunches of vmexit caused by lapic timer interrupt storm.
Test 2 is our current patch. It shows during guest UC, vmexit only triggerred by interrupt/ CR access/ MSR access/ wbinvd. With these vmexits hypervisor will not touch any guest memory.

For test1 lapic timer interrupt storm triggered by the 'wbinvd' (without it everything works fine. I didn't dig out the reason -- maybe wbinvd involved into tricky vmentry microcode process?), but anyway, test 2 demonstrates that hypervisor will not touch guest memory during guest UC period.

Tests log attached below:
=========================================================
Test 1: add wbinvd before vmentry (at vmx_vmenter_helper())
(XEN) uc_vmenter_count = 10607
(XEN) uc_vmexit_count = 10607
(XEN) EXIT-REASON      COUNT
(XEN)        1               10463       // EXIT_REASON_EXTERNAL_INTERRUPT
(XEN)       28                   10       // EXIT_REASON_CR_ACCESS
(XEN)       31                 114       // EXIT_REASON_MSR_READ
(XEN)       32                   15       // EXIT_REASON_MSR_WRITE
(XEN)       54                     5       // EXIT_REASON_WBINVD
(XEN) TOTAL EXIT-REASON-COUNT = 10607
(XEN)
(XEN) vcpu[0] vmentry count = 10492
(XEN) vcpu[1] vmentry count = 37
(XEN) vcpu[2] vmentry count = 40
(XEN) vcpu[3] vmentry count = 38
(XEN) interrupt vec 0xfa occurs 10450 times  // lapic timer
(XEN) interrupt vec 0xfb occurs 13 times       // call function IPI


Test 2: current patch which didn't add wbinvd before vmentry
(XEN) uc_vmenter_count = 147
(XEN) uc_vmexit_count = 147
(XEN) EXIT-REASON      COUNT
(XEN)        1                      3          // EXIT_REASON_EXTERNAL_INTERRUPT
(XEN)       28                    10         // EXIT_REASON_CR_ACCESS
(XEN)       31                  114         // EXIT_REASON_MSR_READ
(XEN)       32                    15         // EXIT_REASON_MSR_WRITE
(XEN)       54                      5         // EXIT_REASON_WBINVD
(XEN) TOTAL EXIT-REASON-COUNT = 147
(XEN)
(XEN) vcpu[0] vmentry count = 45
(XEN) vcpu[1] vmentry count = 34
(XEN) vcpu[2] vmentry count = 34
(XEN) vcpu[3] vmentry count = 34
(XEN) interrupt vec 0xfa occurs 3 times  // lapic timer
==================================================================

Thanks,
Jinsong

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-29 16:52                 ` Liu, Jinsong
@ 2013-10-29 17:20                   ` Andrew Cooper
  2013-10-30 15:21                     ` Liu, Jinsong
  2013-10-30  8:05                   ` Jan Beulich
  1 sibling, 1 reply; 22+ messages in thread
From: Andrew Cooper @ 2013-10-29 17:20 UTC (permalink / raw)
  To: Liu, Jinsong
  Cc: keir, Nakajima, Jun, tim, Dong, Eddie, zhenzhong.duan, xen-devel,
	Auld, Will, Jan Beulich, suravee.suthikulpanit, sherry.hurwitz

On 29/10/13 16:52, Liu, Jinsong wrote:
> Jan Beulich wrote:
>>>>> On 28.10.13 at 09:31, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
>>> Jan Beulich wrote:
>>>> While mentally going through this logic again I noticed, however,
>>>> that the cache flushing your patch is doing is still insufficient:
>>>> Doing this just when CD gets set and in the context switch path is
>>>> not enough. This needs to be done prior to each VM entry, unless it
>>>> can be proven that the hypervisor (or the service domain) didn't
>>>> touch guest memory.
>>> I think it's safe: it only need guarantee no vcpu guest context
>>> involved 
>>> into the small window between cache flushing and TLB invalidation --
>>> after that it doesn't care whether hypervisor touch guest memory or
>>> not, since cache is clear and old memory type in TLB is invalidated
>>> (and is UC afterwards), so no cache line will be polluted by guest
>>> context any more. 
>> No - consider a VM exit while in this mode where, in order to process
>> it, the hypervisor or service domain touch guest memory. Such
>> touching will happen with caches being used, and hence unwritten
>> data may be left in the caches when exiting back to the guest when
>> there's no wbinvd on the VM entry path. I don't think anything is
>> being said explicitly anywhere on whether cache contents are being
>> taken into consideration when CD=0 but PAT enforces UC.
>>
> I think we don't need worry about hypervisor touch guest memory when guest UC. I draft 2 tests, monitoring various events during guest UC period.
>
> Test 1 add wbinvd before vmentry. When guest booting, it hungs 10s ~ 60s at vcpu0 UC stage, with bunches of vmexit caused by lapic timer interrupt storm.
> Test 2 is our current patch. It shows during guest UC, vmexit only triggerred by interrupt/ CR access/ MSR access/ wbinvd. With these vmexits hypervisor will not touch any guest memory.
>
> For test1 lapic timer interrupt storm triggered by the 'wbinvd' (without it everything works fine. I didn't dig out the reason -- maybe wbinvd involved into tricky vmentry microcode process?), but anyway, test 2 demonstrates that hypervisor will not touch guest memory during guest UC period.

How about this:

1) Clear a page with ud2s
2) Use the hypervisor msr to write a new hypercall page over this
cleared page
3) Immediately try to make a hypercall using this new page

What guarantee is there that Xen writing the hypercall page has made its
way correctly back to RAM by the time domU tries to execute the hypercall?

~Andrew

>
> Tests log attached below:
> =========================================================
> Test 1: add wbinvd before vmentry (at vmx_vmenter_helper())
> (XEN) uc_vmenter_count = 10607
> (XEN) uc_vmexit_count = 10607
> (XEN) EXIT-REASON      COUNT
> (XEN)        1               10463       // EXIT_REASON_EXTERNAL_INTERRUPT
> (XEN)       28                   10       // EXIT_REASON_CR_ACCESS
> (XEN)       31                 114       // EXIT_REASON_MSR_READ
> (XEN)       32                   15       // EXIT_REASON_MSR_WRITE
> (XEN)       54                     5       // EXIT_REASON_WBINVD
> (XEN) TOTAL EXIT-REASON-COUNT = 10607
> (XEN)
> (XEN) vcpu[0] vmentry count = 10492
> (XEN) vcpu[1] vmentry count = 37
> (XEN) vcpu[2] vmentry count = 40
> (XEN) vcpu[3] vmentry count = 38
> (XEN) interrupt vec 0xfa occurs 10450 times  // lapic timer
> (XEN) interrupt vec 0xfb occurs 13 times       // call function IPI
>
>
> Test 2: current patch which didn't add wbinvd before vmentry
> (XEN) uc_vmenter_count = 147
> (XEN) uc_vmexit_count = 147
> (XEN) EXIT-REASON      COUNT
> (XEN)        1                      3          // EXIT_REASON_EXTERNAL_INTERRUPT
> (XEN)       28                    10         // EXIT_REASON_CR_ACCESS
> (XEN)       31                  114         // EXIT_REASON_MSR_READ
> (XEN)       32                    15         // EXIT_REASON_MSR_WRITE
> (XEN)       54                      5         // EXIT_REASON_WBINVD
> (XEN) TOTAL EXIT-REASON-COUNT = 147
> (XEN)
> (XEN) vcpu[0] vmentry count = 45
> (XEN) vcpu[1] vmentry count = 34
> (XEN) vcpu[2] vmentry count = 34
> (XEN) vcpu[3] vmentry count = 34
> (XEN) interrupt vec 0xfa occurs 3 times  // lapic timer
> ==================================================================
>
> Thanks,
> Jinsong

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-29 16:52                 ` Liu, Jinsong
  2013-10-29 17:20                   ` Andrew Cooper
@ 2013-10-30  8:05                   ` Jan Beulich
  2013-10-30 15:41                     ` Liu, Jinsong
  1 sibling, 1 reply; 22+ messages in thread
From: Jan Beulich @ 2013-10-30  8:05 UTC (permalink / raw)
  To: Jinsong Liu, Jun Nakajima
  Cc: tim, keir, andrew.cooper3, Eddie Dong, zhenzhong.duan, xen-devel,
	Will Auld, suravee.suthikulpanit, sherry.hurwitz

>>> On 29.10.13 at 17:52, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
> Jan Beulich wrote:
>> No - consider a VM exit while in this mode where, in order to process
>> it, the hypervisor or service domain touch guest memory. Such
>> touching will happen with caches being used, and hence unwritten
>> data may be left in the caches when exiting back to the guest when
>> there's no wbinvd on the VM entry path. I don't think anything is
>> being said explicitly anywhere on whether cache contents are being
>> taken into consideration when CD=0 but PAT enforces UC.
>> 
> 
> I think we don't need worry about hypervisor touch guest memory when guest 
> UC. I draft 2 tests, monitoring various events during guest UC period.
> 
> Test 1 add wbinvd before vmentry. When guest booting, it hungs 10s ~ 60s at 
> vcpu0 UC stage, with bunches of vmexit caused by lapic timer interrupt storm.
> Test 2 is our current patch. It shows during guest UC, vmexit only 
> triggerred by interrupt/ CR access/ MSR access/ wbinvd. With these vmexits 
> hypervisor will not touch any guest memory.
> 
> For test1 lapic timer interrupt storm triggered by the 'wbinvd' (without it 
> everything works fine. I didn't dig out the reason -- maybe wbinvd involved 
> into tricky vmentry microcode process?), but anyway, test 2 demonstrates that 
> hypervisor will not touch guest memory during guest UC period.

That storm is a problem, indeed, but what you did isn't meaningful
in any way: You just sampled a specific (presumably Linux) guest
case, but code like this needs to cover the general case (i.e. without
us knowing whether or when the guest might - temporarily or
permanently - suppress caching).

So saying that in this specific case guest memory isn't being touched
doesn't allow us to draw any conclusions other than that perhaps we
need to monitor whether guest memory is being accessed (by setting
a flag in the HVM copy/clear routines), and gate the invocation of
wbinvd based on that. It's clear though that this would still not cover
speculative reads.

Jan

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-29 17:20                   ` Andrew Cooper
@ 2013-10-30 15:21                     ` Liu, Jinsong
  2013-10-30 15:27                       ` Jan Beulich
  0 siblings, 1 reply; 22+ messages in thread
From: Liu, Jinsong @ 2013-10-30 15:21 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: keir, Nakajima, Jun, tim, Dong, Eddie, zhenzhong.duan, xen-devel,
	Auld, Will, Jan Beulich, suravee.suthikulpanit, sherry.hurwitz

Andrew Cooper wrote:
> On 29/10/13 16:52, Liu, Jinsong wrote:
>> Jan Beulich wrote:
>>>>>> On 28.10.13 at 09:31, "Liu, Jinsong" <jinsong.liu@intel.com>
>>>>>> wrote: 
>>>> Jan Beulich wrote:
>>>>> While mentally going through this logic again I noticed, however,
>>>>> that the cache flushing your patch is doing is still insufficient:
>>>>> Doing this just when CD gets set and in the context switch path is
>>>>> not enough. This needs to be done prior to each VM entry, unless
>>>>> it can be proven that the hypervisor (or the service domain)
>>>>> didn't touch guest memory.
>>>> I think it's safe: it only need guarantee no vcpu guest context
>>>> involved into the small window between cache flushing and TLB
>>>> invalidation -- after that it doesn't care whether hypervisor
>>>> touch guest memory or not, since cache is clear and old memory
>>>> type in TLB is invalidated (and is UC afterwards), so no cache
>>>> line will be polluted by guest context any more.
>>> No - consider a VM exit while in this mode where, in order to
>>> process it, the hypervisor or service domain touch guest memory.
>>> Such touching will happen with caches being used, and hence
>>> unwritten data may be left in the caches when exiting back to the
>>> guest when there's no wbinvd on the VM entry path. I don't think
>>> anything is being said explicitly anywhere on whether cache
>>> contents are being taken into consideration when CD=0 but PAT
>>> enforces UC. 
>>> 
>> I think we don't need worry about hypervisor touch guest memory when
>> guest UC. I draft 2 tests, monitoring various events during guest UC
>> period.  
>> 
>> Test 1 add wbinvd before vmentry. When guest booting, it hungs 10s ~
>> 60s at vcpu0 UC stage, with bunches of vmexit caused by lapic timer
>> interrupt storm.  
>> Test 2 is our current patch. It shows during guest UC, vmexit only
>> triggerred by interrupt/ CR access/ MSR access/ wbinvd. With these
>> vmexits hypervisor will not touch any guest memory.  
>> 
>> For test1 lapic timer interrupt storm triggered by the 'wbinvd'
>> (without it everything works fine. I didn't dig out the reason --
>> maybe wbinvd involved into tricky vmentry microcode process?), but
>> anyway, test 2 demonstrates that hypervisor will not touch guest
>> memory during guest UC period.    
> 
> How about this:
> 
> 1) Clear a page with ud2s
> 2) Use the hypervisor msr to write a new hypercall page over this
> cleared page
> 3) Immediately try to make a hypercall using this new page
> 
> What guarantee is there that Xen writing the hypercall page has made
> its way correctly back to RAM by the time domU tries to execute the
> hypercall? 
> 
> ~Andrew

Sorry, seems I didn't understand it?

Thanks,
Jinsong

> 
>> 
>> Tests log attached below:
>> =========================================================
>> Test 1: add wbinvd before vmentry (at vmx_vmenter_helper())
>> (XEN) uc_vmenter_count = 10607
>> (XEN) uc_vmexit_count = 10607
>> (XEN) EXIT-REASON      COUNT
>> (XEN)        1               10463       //
>> EXIT_REASON_EXTERNAL_INTERRUPT (XEN)       28                   10  
>> // EXIT_REASON_CR_ACCESS (XEN)       31                 114       //
>> EXIT_REASON_MSR_READ (XEN)       32                   15       //
>> EXIT_REASON_MSR_WRITE (XEN)       54                     5       //
>> EXIT_REASON_WBINVD (XEN) TOTAL EXIT-REASON-COUNT = 10607
>> (XEN)
>> (XEN) vcpu[0] vmentry count = 10492
>> (XEN) vcpu[1] vmentry count = 37
>> (XEN) vcpu[2] vmentry count = 40
>> (XEN) vcpu[3] vmentry count = 38
>> (XEN) interrupt vec 0xfa occurs 10450 times  // lapic timer
>> (XEN) interrupt vec 0xfb occurs 13 times       // call function IPI
>> 
>> 
>> Test 2: current patch which didn't add wbinvd before vmentry (XEN)
>> uc_vmenter_count = 147 (XEN) uc_vmexit_count = 147
>> (XEN) EXIT-REASON      COUNT
>> (XEN)        1                      3          //
>> EXIT_REASON_EXTERNAL_INTERRUPT (XEN)       28                    10 
>> // EXIT_REASON_CR_ACCESS (XEN)       31                  114        
>> // EXIT_REASON_MSR_READ (XEN)       32                    15        
>> // EXIT_REASON_MSR_WRITE (XEN)       54                      5      
>> // EXIT_REASON_WBINVD (XEN) TOTAL EXIT-REASON-COUNT = 147
>> (XEN)
>> (XEN) vcpu[0] vmentry count = 45
>> (XEN) vcpu[1] vmentry count = 34
>> (XEN) vcpu[2] vmentry count = 34
>> (XEN) vcpu[3] vmentry count = 34
>> (XEN) interrupt vec 0xfa occurs 3 times  // lapic timer
>> ==================================================================
>> 
>> Thanks,
>> Jinsong

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-30 15:21                     ` Liu, Jinsong
@ 2013-10-30 15:27                       ` Jan Beulich
  0 siblings, 0 replies; 22+ messages in thread
From: Jan Beulich @ 2013-10-30 15:27 UTC (permalink / raw)
  To: Jinsong Liu
  Cc: tim, keir, suravee.suthikulpanit, Andrew Cooper, Eddie Dong,
	zhenzhong.duan, xen-devel, Will Auld, Jun Nakajima,
	sherry.hurwitz

>>> On 30.10.13 at 16:21, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
> Andrew Cooper wrote:
>> How about this:
>> 
>> 1) Clear a page with ud2s
>> 2) Use the hypervisor msr to write a new hypercall page over this
>> cleared page
>> 3) Immediately try to make a hypercall using this new page
>> 
>> What guarantee is there that Xen writing the hypercall page has made
>> its way correctly back to RAM by the time domU tries to execute the
>> hypercall? 
> 
> Sorry, seems I didn't understand it?

He just gave you an example for what I was telling you in an
abstract way.

Jan

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-30  8:05                   ` Jan Beulich
@ 2013-10-30 15:41                     ` Liu, Jinsong
  0 siblings, 0 replies; 22+ messages in thread
From: Liu, Jinsong @ 2013-10-30 15:41 UTC (permalink / raw)
  To: Jan Beulich
  Cc: tim, keir, Nakajima, Jun, andrew.cooper3, Dong, Eddie,
	zhenzhong.duan, xen-devel, Auld, Will, suravee.suthikulpanit,
	sherry.hurwitz

Jan Beulich wrote:
>>>> On 29.10.13 at 17:52, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:
>> Jan Beulich wrote:
>>> No - consider a VM exit while in this mode where, in order to
>>> process it, the hypervisor or service domain touch guest memory.
>>> Such touching will happen with caches being used, and hence
>>> unwritten data may be left in the caches when exiting back to the
>>> guest when there's no wbinvd on the VM entry path. I don't think
>>> anything is being said explicitly anywhere on whether cache
>>> contents are being taken into consideration when CD=0 but PAT
>>> enforces UC. 
>>> 
>> 
>> I think we don't need worry about hypervisor touch guest memory when
>> guest UC. I draft 2 tests, monitoring various events during guest UC
>> period. 
>> 
>> Test 1 add wbinvd before vmentry. When guest booting, it hungs 10s ~
>> 60s at vcpu0 UC stage, with bunches of vmexit caused by lapic timer
>> interrupt storm. Test 2 is our current patch. It shows during guest
>> UC, vmexit only triggerred by interrupt/ CR access/ MSR access/
>> wbinvd. With these vmexits hypervisor will not touch any guest
>> memory. 
>> 
>> For test1 lapic timer interrupt storm triggered by the 'wbinvd'
>> (without it everything works fine. I didn't dig out the reason --
>> maybe wbinvd involved into tricky vmentry microcode process?), but
>> anyway, test 2 demonstrates that hypervisor will not touch guest
>> memory during guest UC period. 
> 
> That storm is a problem, indeed, but what you did isn't meaningful
> in any way: You just sampled a specific (presumably Linux) guest
> case, but code like this needs to cover the general case (i.e. without
> us knowing whether or when the guest might - temporarily or
> permanently - suppress caching).
> 
> So saying that in this specific case guest memory isn't being touched
> doesn't allow us to draw any conclusions other than that perhaps we
> need to monitor whether guest memory is being accessed (by setting
> a flag in the HVM copy/clear routines), and gate the invocation of
> wbinvd based on that. It's clear though that this would still not
> cover speculative reads.
> 
> Jan

OK, I will send out a protection patch, adding flag indicating guest memory access to prevent interrupt storm, though it seems not elegant enough. Whenever the interrupt storm got root caused and fixed, the protection flag can be removed.

Thanks,
Jinsong

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-21 15:55 [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling Liu, Jinsong
  2013-10-22 14:55 ` Jan Beulich
  2013-10-22 15:26 ` Tim Deegan
@ 2013-11-04  8:49 ` Zhenzhong Duan
  2013-11-04  9:05   ` kexec spin lock issue (was: Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling) Jan Beulich
  2013-11-06 12:30   ` [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling Jan Beulich
  2013-11-05 21:06 ` Keir Fraser
  3 siblings, 2 replies; 22+ messages in thread
From: Zhenzhong Duan @ 2013-11-04  8:49 UTC (permalink / raw)
  To: Liu, Jinsong, Jan Beulich, Tim Deegan, Andrew Cooper
  Cc: Keir Fraser, Nakajima, Jun, xen-devel, Auld, Will,
	suravee.suthikulpanit, sherry.hurwitz


On 2013-10-21 23:55, Liu, Jinsong wrote:
>  From 4ff1e2955f67954e60562b29a00adea89e5b93ae Mon Sep 17 00:00:00 2001
> From: Liu Jinsong <jinsong.liu@intel.com>
> Date: Thu, 17 Oct 2013 05:49:23 +0800
> Subject: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
>
> This patch solves XSA-60 security hole:
> 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need
> do nothing, since hardware snoop mechanism has ensured cache coherency.
>
> 2. For guest with VT-d but non-snooped, cache coherency can not be
> guaranteed by h/w snoop, therefore it need emulate UC type to guest:
> 2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so that
> guest memory type are all UC.
> 2.2). if it works w/ shadow, drop all shadows so that any new ones would
> be created on demand w/ UC.
>
> This patch also fix a bug of shadow cr0.cd setting. Current shadow has a
> small window between cache flush and TLB invalidation, resulting in possilbe
> cache pollution. This patch pause vcpus so that no vcpus context involved
> into the window.
>
> Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>
> ---
>   xen/arch/x86/hvm/hvm.c            |   75 ++++++++++++++++++++++--------------
>   xen/arch/x86/hvm/vmx/vmcs.c       |    2 +-
>   xen/arch/x86/hvm/vmx/vmx.c        |   58 +++++++++++++++++++++++++++-
>   xen/common/domain.c               |   10 +++++
>   xen/include/asm-x86/hvm/hvm.h     |    1 +
>   xen/include/asm-x86/hvm/support.h |    2 +
>   xen/include/xen/sched.h           |    1 +
>   7 files changed, 117 insertions(+), 32 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 688a943..df021de 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -1701,6 +1701,40 @@ int hvm_mov_from_cr(unsigned int cr, unsigned int gpr)
>       return X86EMUL_UNHANDLEABLE;
>   }
>   
> +void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value)
> +{
> +    if ( value & X86_CR0_CD )
> +    {
> +        /* Entering no fill cache mode. */
> +        spin_lock(&v->domain->arch.hvm_domain.uc_lock);
> +        v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
> +
> +        if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
> +        {
> +            domain_pause_nosync(v->domain);
> +
> +            /* Flush physical caches. */
> +            on_each_cpu(local_flush_cache, NULL, 1);
> +            hvm_set_uc_mode(v, 1);
> +
> +            domain_unpause(v->domain);
> +        }
> +        spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
> +    }
> +    else if ( !(value & X86_CR0_CD) &&
> +              (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
> +    {
> +        /* Exit from no fill cache mode. */
> +        spin_lock(&v->domain->arch.hvm_domain.uc_lock);
> +        v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
> +
> +        if ( domain_exit_uc_mode(v) )
> +            hvm_set_uc_mode(v, 0);
> +
> +        spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
> +    }
> +}
> +
>   int hvm_set_cr0(unsigned long value)
>   {
>       struct vcpu *v = current;
> @@ -1784,35 +1818,18 @@ int hvm_set_cr0(unsigned long value)
>           }
>       }
>   
> -    if ( has_arch_mmios(v->domain) )
> -    {
> -        if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) )
> -        {
> -            /* Entering no fill cache mode. */
> -            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
> -            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
> -
> -            if ( !v->domain->arch.hvm_domain.is_in_uc_mode )
> -            {
> -                /* Flush physical caches. */
> -                on_each_cpu(local_flush_cache, NULL, 1);
> -                hvm_set_uc_mode(v, 1);
> -            }
> -            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
> -        }
> -        else if ( !(value & (X86_CR0_CD | X86_CR0_NW)) &&
> -                  (v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
> -        {
> -            /* Exit from no fill cache mode. */
> -            spin_lock(&v->domain->arch.hvm_domain.uc_lock);
> -            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
> -
> -            if ( domain_exit_uc_mode(v) )
> -                hvm_set_uc_mode(v, 0);
> -
> -            spin_unlock(&v->domain->arch.hvm_domain.uc_lock);
> -        }
> -    }
> +    /*
> +     * When cr0.cd setting
> +     * 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need not
> +     * do anything, since hardware snoop mechanism has ensured cache coherency;
> +     * 2. For guest with VT-d but non-snooped, cache coherency cannot be
> +     * guaranteed by h/w so need emulate UC memory type to guest.
> +     */
> +    if ( ((value ^ old_value) & X86_CR0_CD) &&
> +           has_arch_pdevs(v->domain) &&
> +           iommu_enabled && !iommu_snoop &&
> +           hvm_funcs.handle_cd )
> +        hvm_funcs.handle_cd(v, value);
>   
>       v->arch.hvm_vcpu.guest_cr[0] = value;
>       hvm_update_guest_cr(v, 0);
> diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
> index 6916c6d..7a01b1f 100644
> --- a/xen/arch/x86/hvm/vmx/vmcs.c
> +++ b/xen/arch/x86/hvm/vmx/vmcs.c
> @@ -921,7 +921,7 @@ static int construct_vmcs(struct vcpu *v)
>           vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS, MSR_TYPE_R | MSR_TYPE_W);
>           vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP, MSR_TYPE_R | MSR_TYPE_W);
>           vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP, MSR_TYPE_R | MSR_TYPE_W);
> -        if ( paging_mode_hap(d) )
> +        if ( paging_mode_hap(d) && (!iommu_enabled || iommu_snoop) )
>               vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT, MSR_TYPE_R | MSR_TYPE_W);
>       }
>   
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 6dedb29..d846a9c 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -642,6 +642,10 @@ static void vmx_ctxt_switch_to(struct vcpu *v)
>               __invept(INVEPT_SINGLE_CONTEXT, ept_get_eptp(ept_data), 0);
>       }
>   
> +    /* For guest cr0.cd setting, do not use potentially polluted cache */
> +    if ( unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
> +        wbinvd();
> +
>       vmx_restore_guest_msrs(v);
>       vmx_restore_dr(v);
>   }
> @@ -908,7 +912,8 @@ static unsigned long vmx_get_shadow_gs_base(struct vcpu *v)
>   
>   static int vmx_set_guest_pat(struct vcpu *v, u64 gpat)
>   {
> -    if ( !paging_mode_hap(v->domain) )
> +    if ( !paging_mode_hap(v->domain) ||
> +         unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
>           return 0;
>   
>       vmx_vmcs_enter(v);
> @@ -919,7 +924,8 @@ static int vmx_set_guest_pat(struct vcpu *v, u64 gpat)
>   
>   static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
>   {
> -    if ( !paging_mode_hap(v->domain) )
> +    if ( !paging_mode_hap(v->domain) ||
> +         unlikely(v->arch.hvm_vcpu.cache_mode == NO_FILL_CACHE_MODE) )
>           return 0;
>   
>       vmx_vmcs_enter(v);
> @@ -928,6 +934,53 @@ static int vmx_get_guest_pat(struct vcpu *v, u64 *gpat)
>       return 1;
>   }
>   
> +static void vmx_handle_cd(struct vcpu *v, unsigned long value)
> +{
> +    if ( !paging_mode_hap(v->domain) )
> +    {
> +        /*
> +         * For shadow, 'load IA32_PAT' VM-entry control is 0, so it cannot
> +         * set guest memory type as UC via IA32_PAT. Xen drop all shadows
> +         * so that any new ones would be created on demand.
> +         */
> +        hvm_shadow_handle_cd(v, value);
> +    }
> +    else
> +    {
> +        u64 *pat = &v->arch.hvm_vcpu.pat_cr;
> +
> +        if ( value & X86_CR0_CD )
> +        {
> +            /*
> +             * For EPT, set guest IA32_PAT fields as UC so that guest
> +             * memory type are all UC.
> +             */
> +            u64 uc_pat =
> +                ((uint64_t)PAT_TYPE_UNCACHABLE)       |       /* PAT0 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 8)  |       /* PAT1 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 16) |       /* PAT2 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 24) |       /* PAT3 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 32) |       /* PAT4 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 40) |       /* PAT5 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 48) |       /* PAT6 */
> +                ((uint64_t)PAT_TYPE_UNCACHABLE << 56);        /* PAT7 */
> +
> +            vmx_get_guest_pat(v, pat);
> +            vmx_set_guest_pat(v, uc_pat);
> +
> +            wbinvd();               /* flush possibly polluted cache */
> +            hvm_asid_flush_vcpu(v); /* invalidate memory type cached in TLB */
> +            v->arch.hvm_vcpu.cache_mode = NO_FILL_CACHE_MODE;
> +        }
> +        else
> +        {
> +            v->arch.hvm_vcpu.cache_mode = NORMAL_CACHE_MODE;
> +            vmx_set_guest_pat(v, *pat);
> +            hvm_asid_flush_vcpu(v); /* no need to flush cache */
> +        }
> +    }
> +}
> +
>   static void vmx_set_tsc_offset(struct vcpu *v, u64 offset)
>   {
>       vmx_vmcs_enter(v);
> @@ -1550,6 +1603,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
>       .msr_read_intercept   = vmx_msr_read_intercept,
>       .msr_write_intercept  = vmx_msr_write_intercept,
>       .invlpg_intercept     = vmx_invlpg_intercept,
> +    .handle_cd            = vmx_handle_cd,
>       .set_info_guest       = vmx_set_info_guest,
>       .set_rdtsc_exiting    = vmx_set_rdtsc_exiting,
>       .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 5999779..ce20323 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -825,6 +825,16 @@ void domain_pause(struct domain *d)
>           vcpu_sleep_sync(v);
>   }
>   
> +void domain_pause_nosync(struct domain *d)
> +{
> +    struct vcpu *v;
> +
> +    atomic_inc(&d->pause_count);
> +
> +    for_each_vcpu( d, v )
> +        vcpu_sleep_nosync(v);
> +}
> +
>   void domain_unpause(struct domain *d)
>   {
>       struct vcpu *v;
> diff --git a/xen/include/asm-x86/hvm/hvm.h b/xen/include/asm-x86/hvm/hvm.h
> index 8dd2b40..c9afb56 100644
> --- a/xen/include/asm-x86/hvm/hvm.h
> +++ b/xen/include/asm-x86/hvm/hvm.h
> @@ -156,6 +156,7 @@ struct hvm_function_table {
>       int (*msr_read_intercept)(unsigned int msr, uint64_t *msr_content);
>       int (*msr_write_intercept)(unsigned int msr, uint64_t msr_content);
>       void (*invlpg_intercept)(unsigned long vaddr);
> +    void (*handle_cd)(struct vcpu *v, unsigned long value);
>       void (*set_info_guest)(struct vcpu *v);
>       void (*set_rdtsc_exiting)(struct vcpu *v, bool_t);
>   
> diff --git a/xen/include/asm-x86/hvm/support.h b/xen/include/asm-x86/hvm/support.h
> index 7ddc806..52aef1f 100644
> --- a/xen/include/asm-x86/hvm/support.h
> +++ b/xen/include/asm-x86/hvm/support.h
> @@ -130,6 +130,8 @@ void hvm_rdtsc_intercept(struct cpu_user_regs *regs);
>   
>   int __must_check hvm_handle_xsetbv(u32 index, u64 new_bv);
>   
> +void hvm_shadow_handle_cd(struct vcpu *v, unsigned long value);
> +
>   /* These functions all return X86EMUL return codes. */
>   int hvm_set_efer(uint64_t value);
>   int hvm_set_cr0(unsigned long value);
> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
> index 1765e18..82f522e 100644
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -700,6 +700,7 @@ void vcpu_unblock(struct vcpu *v);
>   void vcpu_pause(struct vcpu *v);
>   void vcpu_pause_nosync(struct vcpu *v);
>   void domain_pause(struct domain *d);
> +void domain_pause_nosync(struct domain *d);
>   void vcpu_unpause(struct vcpu *v);
>   void domain_unpause(struct domain *d);
>   void domain_pause_by_systemcontroller(struct domain *d);
     We have a problem w/ the new Xen4.4. hypervisor - dom0 comes up but 
hit the following bug and the box reboot.
     See stack trace bellow.

...
Starting portmap: [  OK  ]
Starting NFS statd: [  OK  ]
Starting RPC idmapd: [  OK  ]
(XEN) Xen BUG at spinlock.c:48
(XEN) ----[ Xen-4.4-unstable  x86_64  debug=y  Not tainted ]----
(XEN) CPU:    2
(XEN) RIP:    e008:[<ffff82d080127355>] check_lock+0x3d/0x43
(XEN) RFLAGS: 0000000000010046   CONTEXT: hypervisor
(XEN) rax: 0000000000000000   rbx: ffff82d08028ab08   rcx: 0000000000000001
(XEN) rdx: 0000000000000000   rsi: 0000000000000001   rdi: ffff82d08028ab0c
(XEN) rbp: ffff83203fda7c50   rsp: ffff83203fda7c50   r8: ffff82d0802dfc88
(XEN) r9:  00000000deadbeef   r10: ffff82d08023e120   r11: 0000000000000206
(XEN) r12: ffff83007f481ff0   r13: 0000000000000000   r14: 000ffff82cfffd5d
(XEN) r15: ffff82cfffd5e000   cr0: 0000000080050033   cr4: 00000000000026f0
(XEN) cr3: 000000c083a2a000   cr2: 000000000040e000
(XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
(XEN) Xen stack trace from rsp=ffff83203fda7c50:
(XEN)    ffff83203fda7c68 ffff82d08012750d ffff83007f74e000 
ffff83203fda7d08
(XEN)    ffff82d0801717bf ffff83203fda7ca8 010183203fda7d88 
0000000000000163
(XEN)    000000013fda7d88 ffff832000000163 ffff83203fda7d28 
0000000000000000
(XEN)    000000000c082433 01ffffffffffffff ffff83007f4839f8 
ffff82d08026ff20
(XEN)    ffff83203fdcafd0 0000000000000000 00000000000002a2 
ffff82d0802dfc90
(XEN)    0000000000000001 000000c082432000 00000000000002a2 
ffff83203fda7d18
(XEN)    ffff82d080171e8a ffff83203fda7d58 ffff82d08019f5f5 
0000000000000002
(XEN)    ffff82d0802dfc88 ffff83203fda7db8 00000000ffffffea 
0000000000000001
(XEN)    0000000000000000 ffff83203fda7da8 ffff82d080112f8b 
0000000000000002
(XEN)    ffff83203fdac6c8 0000000200000005 0000000000000000 
0000000000000001
(XEN)    0000000000000000 ffff8807bc799e68 0000000000000246 
ffff83203fda7ee8
(XEN)    ffff82d080113cce 0000000000000001 000000c082432000 
0000000000000000
(XEN)    000000c085c1b000 0000000000000000 000000c085c1a000 
0000000000000000
(XEN)    000000c085c19000 0000000000000000 000000c085c18000 
0000000000000000
(XEN)    000000c085eb7000 0000000000000000 000000c085eb6000 
0000000000000000
(XEN)    000000c085eb5000 0000000000000000 000000c082433000 
000000c085eb4002
(XEN)    0000000008f59690 ffff82d0801274bf ffff83007f2db060 
ffff83203fda7e88
(XEN)    ffff82d08018d8ee ffff83203fd9c330 ffff83203fda7f18 
ffff83203fda7ef8
(XEN)    ffff82d080220550 0000000000000000 0000000008fff000 
0000000000000044
(XEN)    0000000000000000 ffffffff8125bd07 ffff83007f2db000 
ffff8807bc684000
(XEN) Xen call trace:
(XEN)    [<ffff82d080127355>] check_lock+0x3d/0x43
(XEN)    [<ffff82d08012750d>] _spin_lock+0x11/0x48
(XEN)    [<ffff82d0801717bf>] map_pages_to_xen+0xcab/0x1052
(XEN)    [<ffff82d080171e8a>] __set_fixmap+0x30/0x32
(XEN)    [<ffff82d08019f5f5>] machine_kexec_load+0x66/0xa1
(XEN)    [<ffff82d080112f8b>] kexec_load_unload_internal+0xb9/0x2cc
(XEN)    [<ffff82d080113cce>] do_kexec_op_internal+0x391/0x4b2
(XEN)    [<ffff82d080113e0d>] do_kexec_op+0xe/0x12
(XEN)    [<ffff82d080225c7b>] syscall_enter+0xeb/0x145
(XEN)
(XEN)
(XEN) ****************************************
(XEN) Panic on CPU 2:
(XEN) Xen BUG at spinlock.c:48
(XEN) ****************************************
(XEN)
(XEN) Reboot in five seconds...
(XEN) Resetting with ACPI MEMORY or I/O RESET_REG.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: kexec spin lock issue (was: Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling)
  2013-11-04  8:49 ` Zhenzhong Duan
@ 2013-11-04  9:05   ` Jan Beulich
  2013-11-06 12:30   ` [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling Jan Beulich
  1 sibling, 0 replies; 22+ messages in thread
From: Jan Beulich @ 2013-11-04  9:05 UTC (permalink / raw)
  To: zhenzhong.duan
  Cc: Jinsong Liu, Keir Fraser, suravee.suthikulpanit, Andrew Cooper,
	Tim Deegan, xen-devel, Will Auld, Jun Nakajima, sherry.hurwitz

>>> On 04.11.13 at 09:49, Zhenzhong Duan <zhenzhong.duan@oracle.com> wrote:
>      We have a problem w/ the new Xen4.4. hypervisor - dom0 comes up but 
> hit the following bug and the box reboot.
>      See stack trace bellow.

I'm sorry, but please put new problems in new threads. I'll see to
take a look later today. Also, from the stack trace it's clear that
kexec is involved here, yet you clearly don't need this for the
test(s) you want to do in the original context.

Jan

> ...
> Starting portmap: [  OK  ]
> Starting NFS statd: [  OK  ]
> Starting RPC idmapd: [  OK  ]
> (XEN) Xen BUG at spinlock.c:48
> (XEN) ----[ Xen-4.4-unstable  x86_64  debug=y  Not tainted ]----
> (XEN) CPU:    2
> (XEN) RIP:    e008:[<ffff82d080127355>] check_lock+0x3d/0x43
> (XEN) RFLAGS: 0000000000010046   CONTEXT: hypervisor
> (XEN) rax: 0000000000000000   rbx: ffff82d08028ab08   rcx: 0000000000000001
> (XEN) rdx: 0000000000000000   rsi: 0000000000000001   rdi: ffff82d08028ab0c
> (XEN) rbp: ffff83203fda7c50   rsp: ffff83203fda7c50   r8: ffff82d0802dfc88
> (XEN) r9:  00000000deadbeef   r10: ffff82d08023e120   r11: 0000000000000206
> (XEN) r12: ffff83007f481ff0   r13: 0000000000000000   r14: 000ffff82cfffd5d
> (XEN) r15: ffff82cfffd5e000   cr0: 0000000080050033   cr4: 00000000000026f0
> (XEN) cr3: 000000c083a2a000   cr2: 000000000040e000
> (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
> (XEN) Xen stack trace from rsp=ffff83203fda7c50:
> (XEN)    ffff83203fda7c68 ffff82d08012750d ffff83007f74e000 
> ffff83203fda7d08
> (XEN)    ffff82d0801717bf ffff83203fda7ca8 010183203fda7d88 
> 0000000000000163
> (XEN)    000000013fda7d88 ffff832000000163 ffff83203fda7d28 
> 0000000000000000
> (XEN)    000000000c082433 01ffffffffffffff ffff83007f4839f8 
> ffff82d08026ff20
> (XEN)    ffff83203fdcafd0 0000000000000000 00000000000002a2 
> ffff82d0802dfc90
> (XEN)    0000000000000001 000000c082432000 00000000000002a2 
> ffff83203fda7d18
> (XEN)    ffff82d080171e8a ffff83203fda7d58 ffff82d08019f5f5 
> 0000000000000002
> (XEN)    ffff82d0802dfc88 ffff83203fda7db8 00000000ffffffea 
> 0000000000000001
> (XEN)    0000000000000000 ffff83203fda7da8 ffff82d080112f8b 
> 0000000000000002
> (XEN)    ffff83203fdac6c8 0000000200000005 0000000000000000 
> 0000000000000001
> (XEN)    0000000000000000 ffff8807bc799e68 0000000000000246 
> ffff83203fda7ee8
> (XEN)    ffff82d080113cce 0000000000000001 000000c082432000 
> 0000000000000000
> (XEN)    000000c085c1b000 0000000000000000 000000c085c1a000 
> 0000000000000000
> (XEN)    000000c085c19000 0000000000000000 000000c085c18000 
> 0000000000000000
> (XEN)    000000c085eb7000 0000000000000000 000000c085eb6000 
> 0000000000000000
> (XEN)    000000c085eb5000 0000000000000000 000000c082433000 
> 000000c085eb4002
> (XEN)    0000000008f59690 ffff82d0801274bf ffff83007f2db060 
> ffff83203fda7e88
> (XEN)    ffff82d08018d8ee ffff83203fd9c330 ffff83203fda7f18 
> ffff83203fda7ef8
> (XEN)    ffff82d080220550 0000000000000000 0000000008fff000 
> 0000000000000044
> (XEN)    0000000000000000 ffffffff8125bd07 ffff83007f2db000 
> ffff8807bc684000
> (XEN) Xen call trace:
> (XEN)    [<ffff82d080127355>] check_lock+0x3d/0x43
> (XEN)    [<ffff82d08012750d>] _spin_lock+0x11/0x48
> (XEN)    [<ffff82d0801717bf>] map_pages_to_xen+0xcab/0x1052
> (XEN)    [<ffff82d080171e8a>] __set_fixmap+0x30/0x32
> (XEN)    [<ffff82d08019f5f5>] machine_kexec_load+0x66/0xa1
> (XEN)    [<ffff82d080112f8b>] kexec_load_unload_internal+0xb9/0x2cc
> (XEN)    [<ffff82d080113cce>] do_kexec_op_internal+0x391/0x4b2
> (XEN)    [<ffff82d080113e0d>] do_kexec_op+0xe/0x12
> (XEN)    [<ffff82d080225c7b>] syscall_enter+0xeb/0x145
> (XEN)
> (XEN)
> (XEN) ****************************************
> (XEN) Panic on CPU 2:
> (XEN) Xen BUG at spinlock.c:48
> (XEN) ****************************************
> (XEN)
> (XEN) Reboot in five seconds...
> (XEN) Resetting with ACPI MEMORY or I/O RESET_REG.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-10-21 15:55 [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling Liu, Jinsong
                   ` (2 preceding siblings ...)
  2013-11-04  8:49 ` Zhenzhong Duan
@ 2013-11-05 21:06 ` Keir Fraser
  3 siblings, 0 replies; 22+ messages in thread
From: Keir Fraser @ 2013-11-05 21:06 UTC (permalink / raw)
  To: Liu, Jinsong, Jan Beulich, Tim Deegan, Andrew Cooper
  Cc: Keir Fraser, Nakajima, Jun, zhenzhong.duan, xen-devel, Auld,
	Will, suravee.suthikulpanit, sherry.hurwitz

On 21/10/2013 16:55, "Liu, Jinsong" <jinsong.liu@intel.com> wrote:

> From 4ff1e2955f67954e60562b29a00adea89e5b93ae Mon Sep 17 00:00:00 2001
> From: Liu Jinsong <jinsong.liu@intel.com>
> Date: Thu, 17 Oct 2013 05:49:23 +0800
> Subject: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
> 
> This patch solves XSA-60 security hole:
> 1. For guest w/o VT-d, and for guest with VT-d but snooped, Xen need
> do nothing, since hardware snoop mechanism has ensured cache coherency.
> 
> 2. For guest with VT-d but non-snooped, cache coherency can not be
> guaranteed by h/w snoop, therefore it need emulate UC type to guest:
> 2.1). if it works w/ Intel EPT, set guest IA32_PAT fields as UC so that
> guest memory type are all UC.
> 2.2). if it works w/ shadow, drop all shadows so that any new ones would
> be created on demand w/ UC.
> 
> This patch also fix a bug of shadow cr0.cd setting. Current shadow has a
> small window between cache flush and TLB invalidation, resulting in possilbe
> cache pollution. This patch pause vcpus so that no vcpus context involved
> into the window. 
> 
> Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>

Acked-by: Keir Fraser <keir@xen.org>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling
  2013-11-04  8:49 ` Zhenzhong Duan
  2013-11-04  9:05   ` kexec spin lock issue (was: Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling) Jan Beulich
@ 2013-11-06 12:30   ` Jan Beulich
  1 sibling, 0 replies; 22+ messages in thread
From: Jan Beulich @ 2013-11-06 12:30 UTC (permalink / raw)
  To: zhenzhong.duan
  Cc: Jinsong Liu, Keir Fraser, suravee.suthikulpanit, Andrew Cooper,
	Tim Deegan, xen-devel, Will Auld, Jun Nakajima, sherry.hurwitz

>>> On 04.11.13 at 09:49, Zhenzhong Duan <zhenzhong.duan@oracle.com> wrote:
>      We have a problem w/ the new Xen4.4. hypervisor - dom0 comes up but 
> hit the following bug and the box reboot.
>      See stack trace bellow.
> 
> ...
> Starting portmap: [  OK  ]
> Starting NFS statd: [  OK  ]
> Starting RPC idmapd: [  OK  ]
> (XEN) Xen BUG at spinlock.c:48
> (XEN) ----[ Xen-4.4-unstable  x86_64  debug=y  Not tainted ]----
> (XEN) CPU:    2
> (XEN) RIP:    e008:[<ffff82d080127355>] check_lock+0x3d/0x43
> (XEN) RFLAGS: 0000000000010046   CONTEXT: hypervisor
> (XEN) rax: 0000000000000000   rbx: ffff82d08028ab08   rcx: 0000000000000001
> (XEN) rdx: 0000000000000000   rsi: 0000000000000001   rdi: ffff82d08028ab0c
> (XEN) rbp: ffff83203fda7c50   rsp: ffff83203fda7c50   r8: ffff82d0802dfc88
> (XEN) r9:  00000000deadbeef   r10: ffff82d08023e120   r11: 0000000000000206
> (XEN) r12: ffff83007f481ff0   r13: 0000000000000000   r14: 000ffff82cfffd5d
> (XEN) r15: ffff82cfffd5e000   cr0: 0000000080050033   cr4: 00000000000026f0
> (XEN) cr3: 000000c083a2a000   cr2: 000000000040e000
> (XEN) ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: e010   cs: e008
> (XEN) Xen stack trace from rsp=ffff83203fda7c50:
> (XEN)    ffff83203fda7c68 ffff82d08012750d ffff83007f74e000 ffff83203fda7d08
> (XEN)    ffff82d0801717bf ffff83203fda7ca8 010183203fda7d88 0000000000000163
> (XEN)    000000013fda7d88 ffff832000000163 ffff83203fda7d28 0000000000000000
> (XEN)    000000000c082433 01ffffffffffffff ffff83007f4839f8 ffff82d08026ff20
> (XEN)    ffff83203fdcafd0 0000000000000000 00000000000002a2 ffff82d0802dfc90
> (XEN)    0000000000000001 000000c082432000 00000000000002a2 ffff83203fda7d18
> (XEN)    ffff82d080171e8a ffff83203fda7d58 ffff82d08019f5f5 0000000000000002
> (XEN)    ffff82d0802dfc88 ffff83203fda7db8 00000000ffffffea 0000000000000001
> (XEN)    0000000000000000 ffff83203fda7da8 ffff82d080112f8b 0000000000000002
> (XEN)    ffff83203fdac6c8 0000000200000005 0000000000000000 0000000000000001
> (XEN)    0000000000000000 ffff8807bc799e68 0000000000000246 ffff83203fda7ee8
> (XEN)    ffff82d080113cce 0000000000000001 000000c082432000 0000000000000000
> (XEN)    000000c085c1b000 0000000000000000 000000c085c1a000 0000000000000000
> (XEN)    000000c085c19000 0000000000000000 000000c085c18000 0000000000000000
> (XEN)    000000c085eb7000 0000000000000000 000000c085eb6000 0000000000000000
> (XEN)    000000c085eb5000 0000000000000000 000000c082433000 000000c085eb4002
> (XEN)    0000000008f59690 ffff82d0801274bf ffff83007f2db060 ffff83203fda7e88
> (XEN)    ffff82d08018d8ee ffff83203fd9c330 ffff83203fda7f18 ffff83203fda7ef8
> (XEN)    ffff82d080220550 0000000000000000 0000000008fff000 0000000000000044
> (XEN)    0000000000000000 ffffffff8125bd07 ffff83007f2db000 ffff8807bc684000
> (XEN) Xen call trace:
> (XEN)    [<ffff82d080127355>] check_lock+0x3d/0x43
> (XEN)    [<ffff82d08012750d>] _spin_lock+0x11/0x48
> (XEN)    [<ffff82d0801717bf>] map_pages_to_xen+0xcab/0x1052
> (XEN)    [<ffff82d080171e8a>] __set_fixmap+0x30/0x32
> (XEN)    [<ffff82d08019f5f5>] machine_kexec_load+0x66/0xa1
> (XEN)    [<ffff82d080112f8b>] kexec_load_unload_internal+0xb9/0x2cc
> (XEN)    [<ffff82d080113cce>] do_kexec_op_internal+0x391/0x4b2
> (XEN)    [<ffff82d080113e0d>] do_kexec_op+0xe/0x12
> (XEN)    [<ffff82d080225c7b>] syscall_enter+0xeb/0x145
> (XEN)
> (XEN)
> (XEN) ****************************************
> (XEN) Panic on CPU 2:
> (XEN) Xen BUG at spinlock.c:48
> (XEN) ****************************************

The patch at
http://lists.xenproject.org/archives/html/xen-devel/2013-11/msg00659.html
should help.

Jan

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2013-11-06 12:30 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-10-21 15:55 [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling Liu, Jinsong
2013-10-22 14:55 ` Jan Beulich
2013-10-23  8:48   ` DuanZhenzhong
2013-10-23 16:29   ` Nakajima, Jun
2013-10-23 16:38     ` Jan Beulich
2013-10-24 16:19       ` Liu, Jinsong
2013-10-24 16:39         ` Liu, Jinsong
2013-10-28  7:29           ` Jan Beulich
2013-10-28  8:31             ` Liu, Jinsong
2013-10-28  9:29               ` Jan Beulich
2013-10-29 16:52                 ` Liu, Jinsong
2013-10-29 17:20                   ` Andrew Cooper
2013-10-30 15:21                     ` Liu, Jinsong
2013-10-30 15:27                       ` Jan Beulich
2013-10-30  8:05                   ` Jan Beulich
2013-10-30 15:41                     ` Liu, Jinsong
2013-10-22 15:26 ` Tim Deegan
2013-10-23 10:16   ` Andrew Cooper
2013-11-04  8:49 ` Zhenzhong Duan
2013-11-04  9:05   ` kexec spin lock issue (was: Re: [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling) Jan Beulich
2013-11-06 12:30   ` [PATCH 3/3 V3] XSA-60 security hole: cr0.cd handling Jan Beulich
2013-11-05 21:06 ` Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.