All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] Enable SMEP CPU feature support for XEN hypervisor
@ 2011-06-03 13:57 Li, Xin
  2011-06-03 17:23 ` Keir Fraser
  0 siblings, 1 reply; 14+ messages in thread
From: Li, Xin @ 2011-06-03 13:57 UTC (permalink / raw)
  To: 'Keir Fraser (keir.xen@gmail.com)'; +Cc: xen-devel

Intel new CPU supports SMEP (Supervisor Mode Execution Protection). SMEP
prevents software operating with CPL < 3 (supervisor mode) from fetching
instructions from any linear address with a valid translation for which the U/S
flag (bit 2) is 1 in every paging-structure entry controlling the translation
for the linear address.

This patch enables SMEP in Xen to protect Xen hypervisor from executing pv guest
instructions, whose translation paging-structure entries' U/S flags are all set.

It crashes a pv guest triggering SMEP fault.

Signed-off-by: Yang Wei <wei.y.yang@intel.com>
Signed-off-by: Shan Haitao <haitao.shan@intel.com>
Signed-off-by: Li Xin <xin.li@intel.com>

diff -r 0791661a32d8 xen/arch/x86/cpu/common.c
--- a/xen/arch/x86/cpu/common.c	Thu Jun 02 18:46:35 2011 +0100
+++ b/xen/arch/x86/cpu/common.c	Fri Jun 03 19:59:43 2011 +0800
@@ -222,6 +222,22 @@ static void __init early_cpu_detect(void
 	c->x86_capability[4] = cap4;
 }
 
+/* nosmep: if true, Intel SMEP is disabled. */
+static bool_t disable_smep __cpuinitdata;
+boolean_param("nosmep", disable_smep);
+
+static void __cpuinit setup_smep(struct cpuinfo_x86 *c)
+{
+	if ( cpu_has(c, X86_FEATURE_SMEP) ) {
+		if ( unlikely(disable_smep) )
+			setup_clear_cpu_cap(X86_FEATURE_SMEP);
+		else {
+			set_in_cr4(X86_CR4_SMEP);
+			mmu_cr4_features |= X86_CR4_SMEP;
+		}
+	}
+}
+
 void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 {
 	u32 tfms, xlvl, capability, excap, ebx;
@@ -268,6 +284,8 @@ void __cpuinit generic_identify(struct c
 		c->x86_capability[X86_FEATURE_FSGSBASE / 32] = ebx;
 	}
 
+	setup_smep(c);
+
 	early_intel_workaround(c);
 
 #ifdef CONFIG_X86_HT
diff -r 0791661a32d8 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c	Thu Jun 02 18:46:35 2011 +0100
+++ b/xen/arch/x86/traps.c	Fri Jun 03 19:59:43 2011 +0800
@@ -1139,7 +1139,13 @@ static int handle_gdt_ldt_mapping_fault(
     (((va) >= HYPERVISOR_VIRT_START))
 #endif
 
-static int __spurious_page_fault(
+enum pf_type {
+    real_page_fault,
+    smep_fault,
+    spurious_fault
+};
+
+static enum pf_type __page_fault_type(
     unsigned long addr, unsigned int error_code)
 {
     unsigned long mfn, cr3 = read_cr3();
@@ -1151,7 +1157,7 @@ static int __spurious_page_fault(
 #endif
     l2_pgentry_t l2e, *l2t;
     l1_pgentry_t l1e, *l1t;
-    unsigned int required_flags, disallowed_flags;
+    unsigned int required_flags, disallowed_flags, u_flag;
 
     /*
      * We do not take spurious page faults in IRQ handlers as we do not
@@ -1159,11 +1165,11 @@ static int __spurious_page_fault(
      * map_domain_page() is not IRQ-safe.
      */
     if ( in_irq() )
-        return 0;
+        return real_page_fault;
 
     /* Reserved bit violations are never spurious faults. */
     if ( error_code & PFEC_reserved_bit )
-        return 0;
+        return real_page_fault;
 
     required_flags  = _PAGE_PRESENT;
     if ( error_code & PFEC_write_access )
@@ -1175,6 +1181,8 @@ static int __spurious_page_fault(
     if ( error_code & PFEC_insn_fetch )
         disallowed_flags |= _PAGE_NX_BIT;
 
+    u_flag = _PAGE_USER;
+
     mfn = cr3 >> PAGE_SHIFT;
 
 #if CONFIG_PAGING_LEVELS >= 4
@@ -1184,7 +1192,9 @@ static int __spurious_page_fault(
     unmap_domain_page(l4t);
     if ( ((l4e_get_flags(l4e) & required_flags) != required_flags) ||
          (l4e_get_flags(l4e) & disallowed_flags) )
-        return 0;
+        return real_page_fault;
+
+    u_flag &= l4e_get_flags(l4e);
 #endif
 
 #if CONFIG_PAGING_LEVELS >= 3
@@ -1197,13 +1207,23 @@ static int __spurious_page_fault(
     unmap_domain_page(l3t);
 #if CONFIG_PAGING_LEVELS == 3
     if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
-        return 0;
+        return real_page_fault;
 #else
     if ( ((l3e_get_flags(l3e) & required_flags) != required_flags) ||
          (l3e_get_flags(l3e) & disallowed_flags) )
-        return 0;
-    if ( l3e_get_flags(l3e) & _PAGE_PSE )
-        return 1;
+        return real_page_fault;
+
+    u_flag &= l3e_get_flags(l3e);
+
+    if ( l3e_get_flags(l3e) & _PAGE_PSE ) {
+        /* SMEP fault error code 10001b */
+        if ( (error_code & PFEC_insn_fetch) &&
+             !(error_code & PFEC_user_mode) &&
+             cpu_has_smep && u_flag )
+            return smep_fault;
+
+        return spurious_fault;
+    }
 #endif
 #endif
 
@@ -1213,9 +1233,19 @@ static int __spurious_page_fault(
     unmap_domain_page(l2t);
     if ( ((l2e_get_flags(l2e) & required_flags) != required_flags) ||
          (l2e_get_flags(l2e) & disallowed_flags) )
-        return 0;
-    if ( l2e_get_flags(l2e) & _PAGE_PSE )
-        return 1;
+        return real_page_fault;
+
+    u_flag &= l2e_get_flags(l2e);
+
+    if ( l2e_get_flags(l2e) & _PAGE_PSE ) {
+        /* SMEP fault error code 10001b */
+        if ( (error_code & PFEC_insn_fetch) &&
+             !(error_code & PFEC_user_mode) &&
+             cpu_has_smep && u_flag )
+            return smep_fault;
+
+        return spurious_fault;
+    }
 
     l1t = map_domain_page(mfn);
     l1e = l1e_read_atomic(&l1t[l1_table_offset(addr)]);
@@ -1223,26 +1253,37 @@ static int __spurious_page_fault(
     unmap_domain_page(l1t);
     if ( ((l1e_get_flags(l1e) & required_flags) != required_flags) ||
          (l1e_get_flags(l1e) & disallowed_flags) )
-        return 0;
-
-    return 1;
+        return real_page_fault;
+
+    u_flag &= l1e_get_flags(l1e);
+
+    /* SMEP fault error code 10001b */
+    if ( (error_code & PFEC_insn_fetch) &&
+         !(error_code & PFEC_user_mode) &&
+         cpu_has_smep && u_flag )
+        return smep_fault;
+
+    return spurious_fault;
 }
 
 static int spurious_page_fault(
     unsigned long addr, unsigned int error_code)
 {
     unsigned long flags;
-    int           is_spurious;
+    enum pf_type  pf_type;
 
     /*
      * Disabling interrupts prevents TLB flushing, and hence prevents
      * page tables from becoming invalid under our feet during the walk.
      */
     local_irq_save(flags);
-    is_spurious = __spurious_page_fault(addr, error_code);
+    pf_type = __page_fault_type(addr, error_code);
     local_irq_restore(flags);
 
-    return is_spurious;
+    if ( pf_type == smep_fault )
+        domain_crash_synchronous();
+
+    return (pf_type == spurious_fault);
 }
 
 static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
diff -r 0791661a32d8 xen/include/asm-x86/cpufeature.h
--- a/xen/include/asm-x86/cpufeature.h	Thu Jun 02 18:46:35 2011 +0100
+++ b/xen/include/asm-x86/cpufeature.h	Fri Jun 03 19:59:43 2011 +0800
@@ -141,8 +141,9 @@
 #define X86_FEATURE_TBM         (6*32+21) /* trailing bit manipulations */
 #define X86_FEATURE_TOPOEXT     (6*32+22) /* topology extensions CPUID leafs */
 
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 7 */
 #define X86_FEATURE_FSGSBASE	(7*32+ 0) /* {RD,WR}{FS,GS}BASE instructions */
+#define X86_FEATURE_SMEP	(7*32+ 7) /* Supervisor Mode Execution Protection */
 
 #define cpu_has(c, bit)		test_bit(bit, (c)->x86_capability)
 #define boot_cpu_has(bit)	test_bit(bit, boot_cpu_data.x86_capability)
@@ -202,6 +203,8 @@
 #define cpu_has_fsgsbase	boot_cpu_has(X86_FEATURE_FSGSBASE)
 #endif
 
+#define cpu_has_smep            boot_cpu_has(X86_FEATURE_SMEP)
+
 #define cpu_has_ffxsr           ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) \
                                  && boot_cpu_has(X86_FEATURE_FFXSR))
 
diff -r 0791661a32d8 xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h	Thu Jun 02 18:46:35 2011 +0100
+++ b/xen/include/asm-x86/domain.h	Fri Jun 03 19:59:43 2011 +0800
@@ -527,12 +527,14 @@ unsigned long pv_guest_cr4_fixup(const s
 /* Convert between guest-visible and real CR4 values. */
 #define pv_guest_cr4_to_real_cr4(v)                         \
     (((v)->arch.pv_vcpu.ctrlreg[4]                          \
-      | (mmu_cr4_features & (X86_CR4_PGE | X86_CR4_PSE))    \
-      | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)         \
-      | ((xsave_enabled(v))? X86_CR4_OSXSAVE : 0))              \
-      & ~X86_CR4_DE)
-#define real_cr4_to_pv_guest_cr4(c) \
-    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD | X86_CR4_OSXSAVE))
+      | (mmu_cr4_features                                   \
+         & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP))      \
+      | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)          \
+      | ((xsave_enabled(v))? X86_CR4_OSXSAVE : 0))          \
+     & ~X86_CR4_DE)
+#define real_cr4_to_pv_guest_cr4(c)                         \
+    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD        \
+             | X86_CR4_OSXSAVE | X86_CR4_SMEP))
 
 void domain_cpuid(struct domain *d,
                   unsigned int  input,
diff -r 0791661a32d8 xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h	Thu Jun 02 18:46:35 2011 +0100
+++ b/xen/include/asm-x86/processor.h	Fri Jun 03 19:59:43 2011 +0800
@@ -85,6 +85,7 @@
 #define X86_CR4_SMXE		0x4000  /* enable SMX */
 #define X86_CR4_FSGSBASE	0x10000 /* enable {rd,wr}{fs,gs}base */
 #define X86_CR4_OSXSAVE	0x40000 /* enable XSAVE/XRSTOR */
+#define X86_CR4_SMEP		0x100000/* enable SMEP */
 
 /*
  * Trap/fault mnemonics.

^ permalink raw reply	[flat|nested] 14+ messages in thread
* Re: [PATCH v2] Enable SMEP CPU feature support for XEN hypervisor
@ 2011-06-03 14:36 Jan Beulich
  2011-06-03 16:18 ` Li, Xin
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2011-06-03 14:36 UTC (permalink / raw)
  To: keir.xen, xin.li; +Cc: xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 6961 bytes --]

>>> "Li, Xin"  06/03/11 3:59 PM >>>
>--- a/xen/arch/x86/cpu/common.c    Thu Jun 02 18:46:35 2011 +0100
>+++ b/xen/arch/x86/cpu/common.c    Fri Jun 03 19:59:43 2011 +0800
>@@ -222,6 +222,22 @@ static void __init early_cpu_detect(void
>    c->x86_capability[4] = cap4;
>}
 >
>+/* nosmep: if true, Intel SMEP is disabled. */
>+static bool_t disable_smep __cpuinitdata;

__initdata

>+boolean_param("nosmep", disable_smep);
>+
>+static void __cpuinit setup_smep(struct cpuinfo_x86 *c)

__init

>+{
>+    if ( cpu_has(c, X86_FEATURE_SMEP) ) {
>+        if ( unlikely(disable_smep) )
>+            setup_clear_cpu_cap(X86_FEATURE_SMEP);
>+        else {
>+            set_in_cr4(X86_CR4_SMEP);
>+            mmu_cr4_features |= X86_CR4_SMEP;

Why? We old you on the previous patch version that
set_in_cr4() already does this.

>+        }
>+    }
>+}
>+
>void __cpuinit generic_identify(struct cpuinfo_x86 * c)
>{
>    u32 tfms, xlvl, capability, excap, ebx;
>@@ -268,6 +284,8 @@ void __cpuinit generic_identify(struct c
>        c->x86_capability[X86_FEATURE_FSGSBASE / 32] = ebx;
>    }
 >
>+    setup_smep(c);

Still misplaced.

>+
>    early_intel_workaround(c);
 >
>#ifdef CONFIG_X86_HT
>diff -r 0791661a32d8 xen/arch/x86/traps.c
>--- a/xen/arch/x86/traps.c    Thu Jun 02 18:46:35 2011 +0100
>+++ b/xen/arch/x86/traps.c    Fri Jun 03 19:59:43 2011 +0800
>@@ -1139,7 +1139,13 @@ static int handle_gdt_ldt_mapping_fault(
>(((va) >= HYPERVISOR_VIRT_START))
>#endif
 >
>-static int __spurious_page_fault(
>+enum pf_type {
>+    real_page_fault,
>+    smep_fault,
>+    spurious_fault
>+};
>+
>+static enum pf_type __page_fault_type(
>unsigned long addr, unsigned int error_code)
>{
>unsigned long mfn, cr3 = read_cr3();
>@@ -1151,7 +1157,7 @@ static int __spurious_page_fault(
>#endif
>l2_pgentry_t l2e, *l2t;
>l1_pgentry_t l1e, *l1t;
>-    unsigned int required_flags, disallowed_flags;
>+    unsigned int required_flags, disallowed_flags, u_flag;
 >
>/*
>* We do not take spurious page faults in IRQ handlers as we do not
>@@ -1159,11 +1165,11 @@ static int __spurious_page_fault(
>* map_domain_page() is not IRQ-safe.
>*/
>if ( in_irq() )
>-        return 0;
>+        return real_page_fault;
 >
>/* Reserved bit violations are never spurious faults. */
>if ( error_code & PFEC_reserved_bit )
>-        return 0;
>+        return real_page_fault;
 >
>required_flags  = _PAGE_PRESENT;
>if ( error_code & PFEC_write_access )
>@@ -1175,6 +1181,8 @@ static int __spurious_page_fault(
>if ( error_code & PFEC_insn_fetch )
>disallowed_flags |= _PAGE_NX_BIT;
 >
>+    u_flag = _PAGE_USER;

How about

u_flag =             cpu_has_smep ? _PAGE_USER : 0;

avoiding the repeated cpu_has_smep checks below.

>+
>mfn = cr3 >> PAGE_SHIFT;
 >
>#if CONFIG_PAGING_LEVELS >= 4
>@@ -1184,7 +1192,9 @@ static int __spurious_page_fault(
>unmap_domain_page(l4t);
>if ( ((l4e_get_flags(l4e) & required_flags) != required_flags) ||
>(l4e_get_flags(l4e) & disallowed_flags) )
>-        return 0;
>+        return real_page_fault;
>+
>+    u_flag &= l4e_get_flags(l4e);
>#endif
 >
>#if CONFIG_PAGING_LEVELS >= 3
>@@ -1197,13 +1207,23 @@ static int __spurious_page_fault(
>unmap_domain_page(l3t);
>#if CONFIG_PAGING_LEVELS == 3
>if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
>-        return 0;
>+        return real_page_fault;
>#else
>if ( ((l3e_get_flags(l3e) & required_flags) != required_flags) ||
>(l3e_get_flags(l3e) & disallowed_flags) )
>-        return 0;
>-    if ( l3e_get_flags(l3e) & _PAGE_PSE )
>-        return 1;
>+        return real_page_fault;
>+
>+    u_flag &= l3e_get_flags(l3e);
>+
>+    if ( l3e_get_flags(l3e) & _PAGE_PSE ) {
>+        /* SMEP fault error code 10001b */
>+        if ( (error_code & PFEC_insn_fetch) &&
>+             !(error_code & PFEC_user_mode) &&
>+             cpu_has_smep && u_flag )
>+            return smep_fault;
>+
>+        return spurious_fault;
>+    }
>#endif
>#endif
 >
>@@ -1213,9 +1233,19 @@ static int __spurious_page_fault(
>unmap_domain_page(l2t);
>if ( ((l2e_get_flags(l2e) & required_flags) != required_flags) ||
>(l2e_get_flags(l2e) & disallowed_flags) )
>-        return 0;
>-    if ( l2e_get_flags(l2e) & _PAGE_PSE )
>-        return 1;
>+        return real_page_fault;
>+
>+    u_flag &= l2e_get_flags(l2e);
>+
>+    if ( l2e_get_flags(l2e) & _PAGE_PSE ) {
>+        /* SMEP fault error code 10001b */
>+        if ( (error_code & PFEC_insn_fetch) &&
>+             !(error_code & PFEC_user_mode) &&
>+             cpu_has_smep && u_flag )
>+            return smep_fault;
>+
>+        return spurious_fault;
>+    }
 >
>l1t = map_domain_page(mfn);
>l1e = l1e_read_atomic(&l1t[l1_table_offset(addr)]);
>@@ -1223,26 +1253,37 @@ static int __spurious_page_fault(
>unmap_domain_page(l1t);
>if ( ((l1e_get_flags(l1e) & required_flags) != required_flags) ||
>(l1e_get_flags(l1e) & disallowed_flags) )
>-        return 0;
>-
>-    return 1;
>+        return real_page_fault;
>+
>+    u_flag &= l1e_get_flags(l1e);
>+
>+    /* SMEP fault error code 10001b */
>+    if ( (error_code & PFEC_insn_fetch) &&
>+         !(error_code & PFEC_user_mode) &&
>+         cpu_has_smep && u_flag )
>+        return smep_fault;
>+
>+    return spurious_fault;
>}
 >
>static int spurious_page_fault(
>unsigned long addr, unsigned int error_code)
>{
>unsigned long flags;
>-    int           is_spurious;
>+    enum pf_type  pf_type;
 >
>/*
>* Disabling interrupts prevents TLB flushing, and hence prevents
>* page tables from becoming invalid under our feet during the walk.
>*/
>local_irq_save(flags);
>-    is_spurious = __spurious_page_fault(addr, error_code);
>+    pf_type = __page_fault_type(addr, error_code);
>local_irq_restore(flags);
 >
>-    return is_spurious;
>+    if ( pf_type == smep_fault )
>+        domain_crash_synchronous();

I don't think you can domain_crash_synchronous() here, domain_crash()
is perhaps the only possibility (and even if it was possible, I think there
was agreement to not add new calls to the former function and always
use the latter instead).

That said I'm not certain it's a good choice at all, as Keir had already
pointed out on the first patch version.

Finally, spurious_page_fault() has two call sites, only one of which is
for faults that occurred in hypervisor context (which you seem to
assume always being the case). For the one where it's getting called
for a fault in guest context using domain_crash() is probably correct,
but an (possibly guest chosen) alternative would be to forward the
(non-spurious) fault to the guest. Again I thin Keir hinted in that
direction already in response to the first patch version.

>+
>+    return (pf_type == spurious_fault);
>}
 >
>static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)

Jan


[-- Attachment #1.2: HTML --]
[-- Type: text/html, Size: 8995 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread
* Re: [PATCH v2] Enable SMEP CPU feature support for XEN hypervisor
@ 2011-06-05  7:36 Jan Beulich
  2011-06-05  8:39 ` Li, Xin
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Beulich @ 2011-06-05  7:36 UTC (permalink / raw)
  To: xin.li, keir; +Cc: xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 453 bytes --]

>>> Keir Fraser  06/04/11 5:32 PM >>>
>I mean, I know we may as well just hide the feature from PV 64b guests
>totally. That's obvious. Let's stop talking about PV 64b guests already! The
>question is: what to do about PV 32b guests?

Quite obviously we ought to allow 32-bit pv guests to control this for
themselves (and hence see the feature).

Besides that, assuming Xin verified it's working, your latest patch
looks great to me.

Jan


[-- Attachment #1.2: HTML --]
[-- Type: text/html, Size: 750 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2011-06-05 17:04 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-06-03 13:57 [PATCH v2] Enable SMEP CPU feature support for XEN hypervisor Li, Xin
2011-06-03 17:23 ` Keir Fraser
2011-06-03 18:49   ` Li, Xin
2011-06-03 19:22     ` Keir Fraser
2011-06-03 19:37       ` Li, Xin
2011-06-03 20:15         ` Keir Fraser
2011-06-03 14:36 Jan Beulich
2011-06-03 16:18 ` Li, Xin
2011-06-03 17:09   ` Li, Xin
2011-06-05  7:36 Jan Beulich
2011-06-05  8:39 ` Li, Xin
2011-06-05 15:10   ` Keir Fraser
2011-06-05 15:43     ` Li, Xin
2011-06-05 17:04       ` Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.