All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] x86: XSA-67 follow-up
@ 2013-10-10 13:49 Jan Beulich
  2013-10-10 13:53 ` [PATCH 1/4] x86: correct LDT checks Jan Beulich
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Jan Beulich @ 2013-10-10 13:49 UTC (permalink / raw)
  To: xen-devel; +Cc: Keir Fraser

1: correct LDT checks
2: add address validity check to guest_map_l1e()
3: use {rd,wr}{fs,gs}base when available
4: check for canonical address before doing page walks

Signed-off-by: Jan Beulich <jbeulich@suse.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/4] x86: correct LDT checks
  2013-10-10 13:49 [PATCH 0/4] x86: XSA-67 follow-up Jan Beulich
@ 2013-10-10 13:53 ` Jan Beulich
  2013-10-10 13:54 ` [PATCH 2/4] x86: add address validity check to guest_map_l1e() Jan Beulich
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2013-10-10 13:53 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 6681 bytes --]

- MMUEXT_SET_LDT should behave as similarly to the LLDT instruction as
  possible: fail only if the base address is non-canonical
- instead LDT descriptor accesses should fault if the descriptor
  address ends up being non-canonical (by ensuring this we at once
  avoid reading an entry from the mach-to-phys table and consider it a
  page table entry)
- fault propagation on using LDT selectors must distinguish #PF and #GP
  (the latter must be raised for a non-canonical descriptor address,
  which also applies to several other uses of propagate_page_fault(),
  and hence the problem is being fixed there)
- map_ldt_shadow_page() should properly wrap addresses for 32-bit VMs

At once remove the odd invokation of map_ldt_shadow_page() from the
MMUEXT_SET_LDT handler: There's nothing really telling us that the
first LDT page is going to be preferred over others.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -674,12 +674,7 @@ int arch_set_info_guest(
                 fixup_guest_code_selector(d, c.nat->trap_ctxt[i].cs);
             }
 
-            /* LDT safety checks. */
-            if ( ((c.nat->ldt_base & (PAGE_SIZE-1)) != 0) ||
-                 (c.nat->ldt_ents > 8192) ||
-                 !array_access_ok(c.nat->ldt_base,
-                                  c.nat->ldt_ents,
-                                  LDT_ENTRY_SIZE) )
+            if ( !__addr_ok(c.nat->ldt_base) )
                 return -EINVAL;
         }
         else
@@ -692,15 +687,12 @@ int arch_set_info_guest(
 
             for ( i = 0; i < ARRAY_SIZE(c.cmp->trap_ctxt); i++ )
                 fixup_guest_code_selector(d, c.cmp->trap_ctxt[i].cs);
-
-            /* LDT safety checks. */
-            if ( ((c.cmp->ldt_base & (PAGE_SIZE-1)) != 0) ||
-                 (c.cmp->ldt_ents > 8192) ||
-                 !compat_array_access_ok(c.cmp->ldt_base,
-                                         c.cmp->ldt_ents,
-                                         LDT_ENTRY_SIZE) )
-                return -EINVAL;
         }
+
+        /* LDT safety checks. */
+        if ( ((c(ldt_base) & (PAGE_SIZE - 1)) != 0) ||
+             (c(ldt_ents) > 8192) )
+            return -EINVAL;
     }
 
     v->fpu_initialised = !!(flags & VGCF_I387_VALID);
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -582,6 +582,8 @@ int map_ldt_shadow_page(unsigned int off
 
     BUG_ON(unlikely(in_irq()));
 
+    if ( is_pv_32bit_domain(d) )
+        gva = (u32)gva;
     guest_get_eff_kern_l1e(v, gva, &l1e);
     if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
         return 0;
@@ -3229,9 +3231,8 @@ long do_mmuext_op(
                 MEM_LOG("ignoring SET_LDT hypercall from external domain");
                 okay = 0;
             }
-            else if ( ((ptr & (PAGE_SIZE-1)) != 0) || 
-                      (ents > 8192) ||
-                      !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) )
+            else if ( ((ptr & (PAGE_SIZE - 1)) != 0) || !__addr_ok(ptr) ||
+                      (ents > 8192) )
             {
                 okay = 0;
                 MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
@@ -3244,8 +3245,6 @@ long do_mmuext_op(
                 curr->arch.pv_vcpu.ldt_base = ptr;
                 curr->arch.pv_vcpu.ldt_ents = ents;
                 load_LDT(curr);
-                if ( ents != 0 )
-                    (void)map_ldt_shadow_page(0);
             }
             break;
         }
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1065,12 +1065,24 @@ static void reserved_bit_page_fault(
     show_execution_state(regs);
 }
 
-void propagate_page_fault(unsigned long addr, u16 error_code)
+struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code)
 {
     struct trap_info *ti;
     struct vcpu *v = current;
     struct trap_bounce *tb = &v->arch.pv_vcpu.trap_bounce;
 
+    if ( unlikely(!is_canonical_address(addr)) )
+    {
+        ti = &v->arch.pv_vcpu.trap_ctxt[TRAP_gp_fault];
+        tb->flags      = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
+        tb->error_code = 0;
+        tb->cs         = ti->cs;
+        tb->eip        = ti->address;
+        if ( TI_GET_IF(ti) )
+            tb->flags |= TBF_INTERRUPT;
+        return tb;
+    }
+
     v->arch.pv_vcpu.ctrlreg[2] = addr;
     arch_set_cr2(v, addr);
 
@@ -1097,6 +1109,8 @@ void propagate_page_fault(unsigned long 
 
     if ( unlikely(error_code & PFEC_reserved_bit) )
         reserved_bit_page_fault(addr, guest_cpu_user_regs());
+
+    return NULL;
 }
 
 static int handle_gdt_ldt_mapping_fault(
@@ -1130,13 +1144,16 @@ static int handle_gdt_ldt_mapping_fault(
         }
         else
         {
+            struct trap_bounce *tb;
+
             /* In hypervisor mode? Leave it to the #PF handler to fix up. */
             if ( !guest_mode(regs) )
                 return 0;
-            /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
-            propagate_page_fault(
-                curr->arch.pv_vcpu.ldt_base + offset,
-                regs->error_code);
+            /* In guest mode? Propagate fault to guest, with adjusted %cr2. */
+            tb = propagate_page_fault(curr->arch.pv_vcpu.ldt_base + offset,
+                                      regs->error_code);
+            if ( tb )
+                tb->error_code = ((u16)offset & ~3) | 4;
         }
     }
     else
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -555,7 +555,7 @@ int new_guest_cr3(unsigned long pfn);
 void make_cr3(struct vcpu *v, unsigned long mfn);
 void update_cr3(struct vcpu *v);
 int vcpu_destroy_pagetables(struct vcpu *);
-void propagate_page_fault(unsigned long addr, u16 error_code);
+struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code);
 void *do_page_walk(struct vcpu *v, unsigned long addr);
 
 int __sync_local_execstate(void);
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -386,7 +386,8 @@ guest_get_eff_l1e(struct vcpu *v, unsign
     if ( likely(!paging_mode_translate(v->domain)) )
     {
         ASSERT(!paging_mode_external(v->domain));
-        if ( __copy_from_user(eff_l1e, 
+        if ( !__addr_ok(addr) ||
+             __copy_from_user(eff_l1e,
                               &__linear_l1_table[l1_linear_offset(addr)],
                               sizeof(l1_pgentry_t)) != 0 )
             *(l1_pgentry_t *)eff_l1e = l1e_empty();



[-- Attachment #2: x86-LDT-checking.patch --]
[-- Type: text/plain, Size: 6704 bytes --]

x86: correct LDT checks

- MMUEXT_SET_LDT should behave as similarly to the LLDT instruction as
  possible: fail only if the base address is non-canonical
- instead LDT descriptor accesses should fault if the descriptor
  address ends up being non-canonical (by ensuring this we at once
  avoid reading an entry from the mach-to-phys table and consider it a
  page table entry)
- fault propagation on using LDT selectors must distinguish #PF and #GP
  (the latter must be raised for a non-canonical descriptor address,
  which also applies to several other uses of propagate_page_fault(),
  and hence the problem is being fixed there)
- map_ldt_shadow_page() should properly wrap addresses for 32-bit VMs

At once remove the odd invokation of map_ldt_shadow_page() from the
MMUEXT_SET_LDT handler: There's nothing really telling us that the
first LDT page is going to be preferred over others.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -674,12 +674,7 @@ int arch_set_info_guest(
                 fixup_guest_code_selector(d, c.nat->trap_ctxt[i].cs);
             }
 
-            /* LDT safety checks. */
-            if ( ((c.nat->ldt_base & (PAGE_SIZE-1)) != 0) ||
-                 (c.nat->ldt_ents > 8192) ||
-                 !array_access_ok(c.nat->ldt_base,
-                                  c.nat->ldt_ents,
-                                  LDT_ENTRY_SIZE) )
+            if ( !__addr_ok(c.nat->ldt_base) )
                 return -EINVAL;
         }
         else
@@ -692,15 +687,12 @@ int arch_set_info_guest(
 
             for ( i = 0; i < ARRAY_SIZE(c.cmp->trap_ctxt); i++ )
                 fixup_guest_code_selector(d, c.cmp->trap_ctxt[i].cs);
-
-            /* LDT safety checks. */
-            if ( ((c.cmp->ldt_base & (PAGE_SIZE-1)) != 0) ||
-                 (c.cmp->ldt_ents > 8192) ||
-                 !compat_array_access_ok(c.cmp->ldt_base,
-                                         c.cmp->ldt_ents,
-                                         LDT_ENTRY_SIZE) )
-                return -EINVAL;
         }
+
+        /* LDT safety checks. */
+        if ( ((c(ldt_base) & (PAGE_SIZE - 1)) != 0) ||
+             (c(ldt_ents) > 8192) )
+            return -EINVAL;
     }
 
     v->fpu_initialised = !!(flags & VGCF_I387_VALID);
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -582,6 +582,8 @@ int map_ldt_shadow_page(unsigned int off
 
     BUG_ON(unlikely(in_irq()));
 
+    if ( is_pv_32bit_domain(d) )
+        gva = (u32)gva;
     guest_get_eff_kern_l1e(v, gva, &l1e);
     if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
         return 0;
@@ -3229,9 +3231,8 @@ long do_mmuext_op(
                 MEM_LOG("ignoring SET_LDT hypercall from external domain");
                 okay = 0;
             }
-            else if ( ((ptr & (PAGE_SIZE-1)) != 0) || 
-                      (ents > 8192) ||
-                      !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) )
+            else if ( ((ptr & (PAGE_SIZE - 1)) != 0) || !__addr_ok(ptr) ||
+                      (ents > 8192) )
             {
                 okay = 0;
                 MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
@@ -3244,8 +3245,6 @@ long do_mmuext_op(
                 curr->arch.pv_vcpu.ldt_base = ptr;
                 curr->arch.pv_vcpu.ldt_ents = ents;
                 load_LDT(curr);
-                if ( ents != 0 )
-                    (void)map_ldt_shadow_page(0);
             }
             break;
         }
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1065,12 +1065,24 @@ static void reserved_bit_page_fault(
     show_execution_state(regs);
 }
 
-void propagate_page_fault(unsigned long addr, u16 error_code)
+struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code)
 {
     struct trap_info *ti;
     struct vcpu *v = current;
     struct trap_bounce *tb = &v->arch.pv_vcpu.trap_bounce;
 
+    if ( unlikely(!is_canonical_address(addr)) )
+    {
+        ti = &v->arch.pv_vcpu.trap_ctxt[TRAP_gp_fault];
+        tb->flags      = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
+        tb->error_code = 0;
+        tb->cs         = ti->cs;
+        tb->eip        = ti->address;
+        if ( TI_GET_IF(ti) )
+            tb->flags |= TBF_INTERRUPT;
+        return tb;
+    }
+
     v->arch.pv_vcpu.ctrlreg[2] = addr;
     arch_set_cr2(v, addr);
 
@@ -1097,6 +1109,8 @@ void propagate_page_fault(unsigned long 
 
     if ( unlikely(error_code & PFEC_reserved_bit) )
         reserved_bit_page_fault(addr, guest_cpu_user_regs());
+
+    return NULL;
 }
 
 static int handle_gdt_ldt_mapping_fault(
@@ -1130,13 +1144,16 @@ static int handle_gdt_ldt_mapping_fault(
         }
         else
         {
+            struct trap_bounce *tb;
+
             /* In hypervisor mode? Leave it to the #PF handler to fix up. */
             if ( !guest_mode(regs) )
                 return 0;
-            /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
-            propagate_page_fault(
-                curr->arch.pv_vcpu.ldt_base + offset,
-                regs->error_code);
+            /* In guest mode? Propagate fault to guest, with adjusted %cr2. */
+            tb = propagate_page_fault(curr->arch.pv_vcpu.ldt_base + offset,
+                                      regs->error_code);
+            if ( tb )
+                tb->error_code = ((u16)offset & ~3) | 4;
         }
     }
     else
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -555,7 +555,7 @@ int new_guest_cr3(unsigned long pfn);
 void make_cr3(struct vcpu *v, unsigned long mfn);
 void update_cr3(struct vcpu *v);
 int vcpu_destroy_pagetables(struct vcpu *);
-void propagate_page_fault(unsigned long addr, u16 error_code);
+struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code);
 void *do_page_walk(struct vcpu *v, unsigned long addr);
 
 int __sync_local_execstate(void);
--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -386,7 +386,8 @@ guest_get_eff_l1e(struct vcpu *v, unsign
     if ( likely(!paging_mode_translate(v->domain)) )
     {
         ASSERT(!paging_mode_external(v->domain));
-        if ( __copy_from_user(eff_l1e, 
+        if ( !__addr_ok(addr) ||
+             __copy_from_user(eff_l1e,
                               &__linear_l1_table[l1_linear_offset(addr)],
                               sizeof(l1_pgentry_t)) != 0 )
             *(l1_pgentry_t *)eff_l1e = l1e_empty();

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 2/4] x86: add address validity check to guest_map_l1e()
  2013-10-10 13:49 [PATCH 0/4] x86: XSA-67 follow-up Jan Beulich
  2013-10-10 13:53 ` [PATCH 1/4] x86: correct LDT checks Jan Beulich
@ 2013-10-10 13:54 ` Jan Beulich
  2013-10-10 13:57   ` Andrew Cooper
  2013-10-10 13:54 ` [PATCH 3/4] x86: use {rd, wr}{fs, gs}base when available Jan Beulich
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2013-10-10 13:54 UTC (permalink / raw)
  To: xen-devel; +Cc: Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 766 bytes --]

Just like for guest_get_eff_l1e() this prevents accessing as page
tables (and with the wrong memory attribute) internal data inside Xen
happening to be mapped with 1Gb pages.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -360,7 +360,8 @@ guest_map_l1e(struct vcpu *v, unsigned l
         return paging_get_hostmode(v)->guest_map_l1e(v, addr, gl1mfn);
 
     /* Find this l1e and its enclosing l1mfn in the linear map */
-    if ( __copy_from_user(&l2e, 
+    if ( !__addr_ok(addr) ||
+         __copy_from_user(&l2e,
                           &__linear_l2_table[l2_linear_offset(addr)],
                           sizeof(l2_pgentry_t)) != 0 )
         return NULL;




[-- Attachment #2: x86-guest_map_l1e-check.patch --]
[-- Type: text/plain, Size: 814 bytes --]

x86: add address validity check to guest_map_l1e()

Just like for guest_get_eff_l1e() this prevents accessing as page
tables (and with the wrong memory attribute) internal data inside Xen
happening to be mapped with 1Gb pages.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/include/asm-x86/paging.h
+++ b/xen/include/asm-x86/paging.h
@@ -360,7 +360,8 @@ guest_map_l1e(struct vcpu *v, unsigned l
         return paging_get_hostmode(v)->guest_map_l1e(v, addr, gl1mfn);
 
     /* Find this l1e and its enclosing l1mfn in the linear map */
-    if ( __copy_from_user(&l2e, 
+    if ( !__addr_ok(addr) ||
+         __copy_from_user(&l2e,
                           &__linear_l2_table[l2_linear_offset(addr)],
                           sizeof(l2_pgentry_t)) != 0 )
         return NULL;

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 3/4] x86: use {rd, wr}{fs, gs}base when available
  2013-10-10 13:49 [PATCH 0/4] x86: XSA-67 follow-up Jan Beulich
  2013-10-10 13:53 ` [PATCH 1/4] x86: correct LDT checks Jan Beulich
  2013-10-10 13:54 ` [PATCH 2/4] x86: add address validity check to guest_map_l1e() Jan Beulich
@ 2013-10-10 13:54 ` Jan Beulich
  2013-10-10 14:15   ` Andrew Cooper
  2013-10-10 13:55 ` [PATCH 4/4] x86: check for canonical address before doing page walks Jan Beulich
  2013-10-10 15:13 ` [PATCH 0/4] x86: XSA-67 follow-up Keir Fraser
  4 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2013-10-10 13:54 UTC (permalink / raw)
  To: xen-devel; +Cc: Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 7197 bytes --]

... as being intended to be faster than MSR reads/writes.

In the case of emulate_privileged_op() also use these in favor of the
cached (but possibly stale) addresses from arch.pv_vcpu. This allows
entirely removing the code that was the subject of XSA-67.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -31,6 +31,7 @@ $(call cc-options-add,CFLAGS,CC,$(EMBEDD
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
 $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
 $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
+$(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)
 
 ifeq ($(supervisor_mode_kernel),y)
 CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
--- a/xen/arch/x86/acpi/suspend.c
+++ b/xen/arch/x86/acpi/suspend.c
@@ -27,8 +27,8 @@ void save_rest_processor_state(void)
     asm volatile (
         "movw %%ds,(%0); movw %%es,2(%0); movw %%fs,4(%0); movw %%gs,6(%0)"
         : : "r" (saved_segs) : "memory" );
-    rdmsrl(MSR_FS_BASE, saved_fs_base);
-    rdmsrl(MSR_GS_BASE, saved_gs_base);
+    saved_fs_base = rdfsbase();
+    saved_gs_base = rdgsbase();
     rdmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base);
     rdmsrl(MSR_CSTAR, saved_cstar);
     rdmsrl(MSR_LSTAR, saved_lstar);
@@ -56,8 +56,8 @@ void restore_rest_processor_state(void)
           X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_TF,
           0U);
 
-    wrmsrl(MSR_FS_BASE, saved_fs_base);
-    wrmsrl(MSR_GS_BASE, saved_gs_base);
+    wrfsbase(saved_fs_base);
+    wrgsbase(saved_gs_base);
     wrmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base);
 
     if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1092,7 +1092,7 @@ static void load_segments(struct vcpu *n
     {
         /* This can only be non-zero if selector is NULL. */
         if ( n->arch.pv_vcpu.fs_base )
-            wrmsrl(MSR_FS_BASE, n->arch.pv_vcpu.fs_base);
+            wrfsbase(n->arch.pv_vcpu.fs_base);
 
         /* Most kernels have non-zero GS base, so don't bother testing. */
         /* (This is also a serialising instruction, avoiding AMD erratum #88.) */
@@ -1100,7 +1100,7 @@ static void load_segments(struct vcpu *n
 
         /* This can only be non-zero if selector is NULL. */
         if ( n->arch.pv_vcpu.gs_base_user )
-            wrmsrl(MSR_GS_BASE, n->arch.pv_vcpu.gs_base_user);
+            wrgsbase(n->arch.pv_vcpu.gs_base_user);
 
         /* If in kernel mode then switch the GS bases around. */
         if ( (n->arch.flags & TF_kernel_mode) )
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1264,6 +1264,9 @@ void __init __start_xen(unsigned long mb
     if ( cpu_has_smep )
         set_in_cr4(X86_CR4_SMEP);
 
+    if ( cpu_has_fsgsbase )
+        set_in_cr4(X86_CR4_FSGSBASE);
+
     local_irq_enable();
 
     pt_pci_init();
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1985,28 +1985,18 @@ static int emulate_privileged_op(struct 
         }
         else
         {
-            if ( lm_ovr == lm_seg_none || data_sel < 4 )
+            switch ( lm_ovr )
             {
-                switch ( lm_ovr )
-                {
-                case lm_seg_none:
-                    data_base = 0UL;
-                    break;
-                case lm_seg_fs:
-                    data_base = v->arch.pv_vcpu.fs_base;
-                    break;
-                case lm_seg_gs:
-                    if ( guest_kernel_mode(v, regs) )
-                        data_base = v->arch.pv_vcpu.gs_base_kernel;
-                    else
-                        data_base = v->arch.pv_vcpu.gs_base_user;
-                    break;
-                }
+            default:
+                data_base = 0UL;
+                break;
+            case lm_seg_fs:
+                data_base = rdfsbase();
+                break;
+            case lm_seg_gs:
+                data_base = rdgsbase();
+                break;
             }
-            else if ( !read_descriptor(data_sel, v, regs,
-                                       &data_base, &data_limit, &ar, 0) ||
-                      !(ar & _SEGMENT_S) || !(ar & _SEGMENT_P) )
-                goto fail;
             data_limit = ~0UL;
             ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P;
         }
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -457,12 +457,12 @@ unsigned long pv_guest_cr4_fixup(const s
     (((v)->arch.pv_vcpu.ctrlreg[4]                          \
       | (mmu_cr4_features                                   \
          & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP |      \
-            X86_CR4_OSXSAVE))                               \
+            X86_CR4_OSXSAVE | X86_CR4_FSGSBASE))            \
       | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0))         \
      & ~X86_CR4_DE)
 #define real_cr4_to_pv_guest_cr4(c)                         \
-    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD        \
-             | X86_CR4_OSXSAVE | X86_CR4_SMEP))
+    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD |      \
+             X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE))
 
 void domain_cpuid(struct domain *d,
                   unsigned int  input,
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -9,6 +9,7 @@
 #include <xen/percpu.h>
 #include <xen/errno.h>
 #include <asm/asm_defns.h>
+#include <asm/cpufeature.h>
 
 #define rdmsr(msr,val1,val2) \
      __asm__ __volatile__("rdmsr" \
@@ -97,6 +98,61 @@ static inline int wrmsr_safe(unsigned in
 			  : "=a" (low), "=d" (high) \
 			  : "c" (counter))
 
+static inline unsigned long rdfsbase(void)
+{
+    unsigned long base;
+
+    if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+        asm volatile ( "rdfsbase %0" : "=r" (base) );
+#else
+        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) );
+#endif
+    else
+        rdmsrl(MSR_FS_BASE, base);
+
+    return base;
+}
+
+static inline unsigned long rdgsbase(void)
+{
+    unsigned long base;
+
+    if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+        asm volatile ( "rdgsbase %0" : "=r" (base) );
+#else
+        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) );
+#endif
+    else
+        rdmsrl(MSR_GS_BASE, base);
+
+    return base;
+}
+
+static inline void wrfsbase(unsigned long base)
+{
+    if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+        asm volatile ( "wrfsbase %0" :: "r" (base) );
+#else
+        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd0" :: "a" (base) );
+#endif
+    else
+        wrmsrl(MSR_FS_BASE, base);
+}
+
+static inline void wrgsbase(unsigned long base)
+{
+    if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+        asm volatile ( "wrgsbase %0" :: "r" (base) );
+#else
+        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8" :: "a" (base) );
+#endif
+    else
+        wrmsrl(MSR_GS_BASE, base);
+}
 
 DECLARE_PER_CPU(u64, efer);
 u64 read_efer(void);



[-- Attachment #2: x86-fsgsbase.patch --]
[-- Type: text/plain, Size: 7239 bytes --]

x86: use {rd,wr}{fs,gs}base when available

... as being intended to be faster than MSR reads/writes.

In the case of emulate_privileged_op() also use these in favor of the
cached (but possibly stale) addresses from arch.pv_vcpu. This allows
entirely removing the code that was the subject of XSA-67.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/Rules.mk
+++ b/xen/arch/x86/Rules.mk
@@ -31,6 +31,7 @@ $(call cc-options-add,CFLAGS,CC,$(EMBEDD
 $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
 $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
 $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
+$(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)
 
 ifeq ($(supervisor_mode_kernel),y)
 CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
--- a/xen/arch/x86/acpi/suspend.c
+++ b/xen/arch/x86/acpi/suspend.c
@@ -27,8 +27,8 @@ void save_rest_processor_state(void)
     asm volatile (
         "movw %%ds,(%0); movw %%es,2(%0); movw %%fs,4(%0); movw %%gs,6(%0)"
         : : "r" (saved_segs) : "memory" );
-    rdmsrl(MSR_FS_BASE, saved_fs_base);
-    rdmsrl(MSR_GS_BASE, saved_gs_base);
+    saved_fs_base = rdfsbase();
+    saved_gs_base = rdgsbase();
     rdmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base);
     rdmsrl(MSR_CSTAR, saved_cstar);
     rdmsrl(MSR_LSTAR, saved_lstar);
@@ -56,8 +56,8 @@ void restore_rest_processor_state(void)
           X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_TF,
           0U);
 
-    wrmsrl(MSR_FS_BASE, saved_fs_base);
-    wrmsrl(MSR_GS_BASE, saved_gs_base);
+    wrfsbase(saved_fs_base);
+    wrgsbase(saved_gs_base);
     wrmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base);
 
     if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1092,7 +1092,7 @@ static void load_segments(struct vcpu *n
     {
         /* This can only be non-zero if selector is NULL. */
         if ( n->arch.pv_vcpu.fs_base )
-            wrmsrl(MSR_FS_BASE, n->arch.pv_vcpu.fs_base);
+            wrfsbase(n->arch.pv_vcpu.fs_base);
 
         /* Most kernels have non-zero GS base, so don't bother testing. */
         /* (This is also a serialising instruction, avoiding AMD erratum #88.) */
@@ -1100,7 +1100,7 @@ static void load_segments(struct vcpu *n
 
         /* This can only be non-zero if selector is NULL. */
         if ( n->arch.pv_vcpu.gs_base_user )
-            wrmsrl(MSR_GS_BASE, n->arch.pv_vcpu.gs_base_user);
+            wrgsbase(n->arch.pv_vcpu.gs_base_user);
 
         /* If in kernel mode then switch the GS bases around. */
         if ( (n->arch.flags & TF_kernel_mode) )
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1264,6 +1264,9 @@ void __init __start_xen(unsigned long mb
     if ( cpu_has_smep )
         set_in_cr4(X86_CR4_SMEP);
 
+    if ( cpu_has_fsgsbase )
+        set_in_cr4(X86_CR4_FSGSBASE);
+
     local_irq_enable();
 
     pt_pci_init();
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1985,28 +1985,18 @@ static int emulate_privileged_op(struct 
         }
         else
         {
-            if ( lm_ovr == lm_seg_none || data_sel < 4 )
+            switch ( lm_ovr )
             {
-                switch ( lm_ovr )
-                {
-                case lm_seg_none:
-                    data_base = 0UL;
-                    break;
-                case lm_seg_fs:
-                    data_base = v->arch.pv_vcpu.fs_base;
-                    break;
-                case lm_seg_gs:
-                    if ( guest_kernel_mode(v, regs) )
-                        data_base = v->arch.pv_vcpu.gs_base_kernel;
-                    else
-                        data_base = v->arch.pv_vcpu.gs_base_user;
-                    break;
-                }
+            default:
+                data_base = 0UL;
+                break;
+            case lm_seg_fs:
+                data_base = rdfsbase();
+                break;
+            case lm_seg_gs:
+                data_base = rdgsbase();
+                break;
             }
-            else if ( !read_descriptor(data_sel, v, regs,
-                                       &data_base, &data_limit, &ar, 0) ||
-                      !(ar & _SEGMENT_S) || !(ar & _SEGMENT_P) )
-                goto fail;
             data_limit = ~0UL;
             ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P;
         }
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -457,12 +457,12 @@ unsigned long pv_guest_cr4_fixup(const s
     (((v)->arch.pv_vcpu.ctrlreg[4]                          \
       | (mmu_cr4_features                                   \
          & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP |      \
-            X86_CR4_OSXSAVE))                               \
+            X86_CR4_OSXSAVE | X86_CR4_FSGSBASE))            \
       | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0))         \
      & ~X86_CR4_DE)
 #define real_cr4_to_pv_guest_cr4(c)                         \
-    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD        \
-             | X86_CR4_OSXSAVE | X86_CR4_SMEP))
+    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD |      \
+             X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE))
 
 void domain_cpuid(struct domain *d,
                   unsigned int  input,
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -9,6 +9,7 @@
 #include <xen/percpu.h>
 #include <xen/errno.h>
 #include <asm/asm_defns.h>
+#include <asm/cpufeature.h>
 
 #define rdmsr(msr,val1,val2) \
      __asm__ __volatile__("rdmsr" \
@@ -97,6 +98,61 @@ static inline int wrmsr_safe(unsigned in
 			  : "=a" (low), "=d" (high) \
 			  : "c" (counter))
 
+static inline unsigned long rdfsbase(void)
+{
+    unsigned long base;
+
+    if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+        asm volatile ( "rdfsbase %0" : "=r" (base) );
+#else
+        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) );
+#endif
+    else
+        rdmsrl(MSR_FS_BASE, base);
+
+    return base;
+}
+
+static inline unsigned long rdgsbase(void)
+{
+    unsigned long base;
+
+    if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+        asm volatile ( "rdgsbase %0" : "=r" (base) );
+#else
+        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) );
+#endif
+    else
+        rdmsrl(MSR_GS_BASE, base);
+
+    return base;
+}
+
+static inline void wrfsbase(unsigned long base)
+{
+    if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+        asm volatile ( "wrfsbase %0" :: "r" (base) );
+#else
+        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd0" :: "a" (base) );
+#endif
+    else
+        wrmsrl(MSR_FS_BASE, base);
+}
+
+static inline void wrgsbase(unsigned long base)
+{
+    if ( cpu_has_fsgsbase )
+#ifdef HAVE_GAS_FSGSBASE
+        asm volatile ( "wrgsbase %0" :: "r" (base) );
+#else
+        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8" :: "a" (base) );
+#endif
+    else
+        wrmsrl(MSR_GS_BASE, base);
+}
 
 DECLARE_PER_CPU(u64, efer);
 u64 read_efer(void);

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 4/4] x86: check for canonical address before doing page walks
  2013-10-10 13:49 [PATCH 0/4] x86: XSA-67 follow-up Jan Beulich
                   ` (2 preceding siblings ...)
  2013-10-10 13:54 ` [PATCH 3/4] x86: use {rd, wr}{fs, gs}base when available Jan Beulich
@ 2013-10-10 13:55 ` Jan Beulich
  2013-10-10 14:01   ` Andrew Cooper
  2013-10-10 15:13 ` [PATCH 0/4] x86: XSA-67 follow-up Keir Fraser
  4 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2013-10-10 13:55 UTC (permalink / raw)
  To: xen-devel; +Cc: Andrew Cooper, Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 973 bytes --]

... as there doesn't really exists any valid mapping for them.

Particularly in the case of do_page_walk() this also avoids returning
non-NULL for such invalid input.

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -73,7 +73,7 @@ void *do_page_walk(struct vcpu *v, unsig
     l2_pgentry_t l2e, *l2t;
     l1_pgentry_t l1e, *l1t;
 
-    if ( is_hvm_vcpu(v) )
+    if ( is_hvm_vcpu(v) || !is_canonical_address(addr) )
         return NULL;
 
     l4t = map_domain_page(mfn);
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -170,6 +170,8 @@ void show_page_walk(unsigned long addr)
     l1_pgentry_t l1e, *l1t;
 
     printk("Pagetable walk from %016lx:\n", addr);
+    if ( !is_canonical_address(addr) )
+        return;
 
     l4t = map_domain_page(mfn);
     l4e = l4t[l4_table_offset(addr)];




[-- Attachment #2: x86-page-walk-non-canonical.patch --]
[-- Type: text/plain, Size: 1027 bytes --]

x86: check for canonical address before doing page walks

... as there doesn't really exists any valid mapping for them.

Particularly in the case of do_page_walk() this also avoids returning
non-NULL for such invalid input.

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -73,7 +73,7 @@ void *do_page_walk(struct vcpu *v, unsig
     l2_pgentry_t l2e, *l2t;
     l1_pgentry_t l1e, *l1t;
 
-    if ( is_hvm_vcpu(v) )
+    if ( is_hvm_vcpu(v) || !is_canonical_address(addr) )
         return NULL;
 
     l4t = map_domain_page(mfn);
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -170,6 +170,8 @@ void show_page_walk(unsigned long addr)
     l1_pgentry_t l1e, *l1t;
 
     printk("Pagetable walk from %016lx:\n", addr);
+    if ( !is_canonical_address(addr) )
+        return;
 
     l4t = map_domain_page(mfn);
     l4e = l4t[l4_table_offset(addr)];

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/4] x86: add address validity check to guest_map_l1e()
  2013-10-10 13:54 ` [PATCH 2/4] x86: add address validity check to guest_map_l1e() Jan Beulich
@ 2013-10-10 13:57   ` Andrew Cooper
  0 siblings, 0 replies; 11+ messages in thread
From: Andrew Cooper @ 2013-10-10 13:57 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser


[-- Attachment #1.1: Type: text/plain, Size: 1013 bytes --]

On 10/10/13 14:54, Jan Beulich wrote:
> Just like for guest_get_eff_l1e() this prevents accessing as page
> tables (and with the wrong memory attribute) internal data inside Xen
> happening to be mapped with 1Gb pages.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper@citrix.com>
>
> --- a/xen/include/asm-x86/paging.h
> +++ b/xen/include/asm-x86/paging.h
> @@ -360,7 +360,8 @@ guest_map_l1e(struct vcpu *v, unsigned l
>          return paging_get_hostmode(v)->guest_map_l1e(v, addr, gl1mfn);
>  
>      /* Find this l1e and its enclosing l1mfn in the linear map */
> -    if ( __copy_from_user(&l2e, 
> +    if ( !__addr_ok(addr) ||
> +         __copy_from_user(&l2e,
>                            &__linear_l2_table[l2_linear_offset(addr)],
>                            sizeof(l2_pgentry_t)) != 0 )
>          return NULL;
>
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel


[-- Attachment #1.2: Type: text/html, Size: 1899 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/4] x86: check for canonical address before doing page walks
  2013-10-10 13:55 ` [PATCH 4/4] x86: check for canonical address before doing page walks Jan Beulich
@ 2013-10-10 14:01   ` Andrew Cooper
  2013-10-10 14:10     ` Jan Beulich
  0 siblings, 1 reply; 11+ messages in thread
From: Andrew Cooper @ 2013-10-10 14:01 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser

On 10/10/13 14:55, Jan Beulich wrote:
> ... as there doesn't really exists any valid mapping for them.
>
> Particularly in the case of do_page_walk() this also avoids returning
> non-NULL for such invalid input.
>
> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> --- a/xen/arch/x86/x86_64/mm.c
> +++ b/xen/arch/x86/x86_64/mm.c
> @@ -73,7 +73,7 @@ void *do_page_walk(struct vcpu *v, unsig
>      l2_pgentry_t l2e, *l2t;
>      l1_pgentry_t l1e, *l1t;
>  
> -    if ( is_hvm_vcpu(v) )
> +    if ( is_hvm_vcpu(v) || !is_canonical_address(addr) )
>          return NULL;
>  
>      l4t = map_domain_page(mfn);
> --- a/xen/arch/x86/x86_64/traps.c
> +++ b/xen/arch/x86/x86_64/traps.c
> @@ -170,6 +170,8 @@ void show_page_walk(unsigned long addr)
>      l1_pgentry_t l1e, *l1t;
>  
>      printk("Pagetable walk from %016lx:\n", addr);
> +    if ( !is_canonical_address(addr) )
> +        return;
>  
>      l4t = map_domain_page(mfn);
>      l4e = l4t[l4_table_offset(addr)];
>

I was intending something a little more like

if ( !is_canonical_address(addr) )
{
    printk(" Not canonical\n");
    return;
}

So it doesn't appear on the console that show_page_walk() got stuck
somewhere.

~Andrew

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/4] x86: check for canonical address before doing page walks
  2013-10-10 14:01   ` Andrew Cooper
@ 2013-10-10 14:10     ` Jan Beulich
  2013-10-10 14:17       ` Andrew Cooper
  0 siblings, 1 reply; 11+ messages in thread
From: Jan Beulich @ 2013-10-10 14:10 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: xen-devel, Keir Fraser

>>> On 10.10.13 at 16:01, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> On 10/10/13 14:55, Jan Beulich wrote:
>> --- a/xen/arch/x86/x86_64/traps.c
>> +++ b/xen/arch/x86/x86_64/traps.c
>> @@ -170,6 +170,8 @@ void show_page_walk(unsigned long addr)
>>      l1_pgentry_t l1e, *l1t;
>>  
>>      printk("Pagetable walk from %016lx:\n", addr);
>> +    if ( !is_canonical_address(addr) )
>> +        return;
>>  
>>      l4t = map_domain_page(mfn);
>>      l4e = l4t[l4_table_offset(addr)];
>>
> 
> I was intending something a little more like
> 
> if ( !is_canonical_address(addr) )
> {
>     printk(" Not canonical\n");
>     return;
> }
> 
> So it doesn't appear on the console that show_page_walk() got stuck
> somewhere.

Seems pretty pointless - the address is being printed, and anyone
(I would hope) can easily see whether it is non-canonical.

Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/4] x86: use {rd, wr}{fs, gs}base when available
  2013-10-10 13:54 ` [PATCH 3/4] x86: use {rd, wr}{fs, gs}base when available Jan Beulich
@ 2013-10-10 14:15   ` Andrew Cooper
  0 siblings, 0 replies; 11+ messages in thread
From: Andrew Cooper @ 2013-10-10 14:15 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser


[-- Attachment #1.1: Type: text/plain, Size: 7633 bytes --]

On 10/10/13 14:54, Jan Beulich wrote:
> ... as being intended to be faster than MSR reads/writes.
>
> In the case of emulate_privileged_op() also use these in favor of the
> cached (but possibly stale) addresses from arch.pv_vcpu. This allows
> entirely removing the code that was the subject of XSA-67.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

>
> --- a/xen/arch/x86/Rules.mk
> +++ b/xen/arch/x86/Rules.mk
> @@ -31,6 +31,7 @@ $(call cc-options-add,CFLAGS,CC,$(EMBEDD
>  $(call cc-option-add,CFLAGS,CC,-Wnested-externs)
>  $(call as-insn-check,CFLAGS,CC,"vmcall",-DHAVE_GAS_VMX)
>  $(call as-insn-check,CFLAGS,CC,"invept (%rax)$$(comma)%rax",-DHAVE_GAS_EPT)
> +$(call as-insn-check,CFLAGS,CC,"rdfsbase %rax",-DHAVE_GAS_FSGSBASE)
>  
>  ifeq ($(supervisor_mode_kernel),y)
>  CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
> --- a/xen/arch/x86/acpi/suspend.c
> +++ b/xen/arch/x86/acpi/suspend.c
> @@ -27,8 +27,8 @@ void save_rest_processor_state(void)
>      asm volatile (
>          "movw %%ds,(%0); movw %%es,2(%0); movw %%fs,4(%0); movw %%gs,6(%0)"
>          : : "r" (saved_segs) : "memory" );
> -    rdmsrl(MSR_FS_BASE, saved_fs_base);
> -    rdmsrl(MSR_GS_BASE, saved_gs_base);
> +    saved_fs_base = rdfsbase();
> +    saved_gs_base = rdgsbase();
>      rdmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base);
>      rdmsrl(MSR_CSTAR, saved_cstar);
>      rdmsrl(MSR_LSTAR, saved_lstar);
> @@ -56,8 +56,8 @@ void restore_rest_processor_state(void)
>            X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_TF,
>            0U);
>  
> -    wrmsrl(MSR_FS_BASE, saved_fs_base);
> -    wrmsrl(MSR_GS_BASE, saved_gs_base);
> +    wrfsbase(saved_fs_base);
> +    wrgsbase(saved_gs_base);
>      wrmsrl(MSR_SHADOW_GS_BASE, saved_kernel_gs_base);
>  
>      if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -1092,7 +1092,7 @@ static void load_segments(struct vcpu *n
>      {
>          /* This can only be non-zero if selector is NULL. */
>          if ( n->arch.pv_vcpu.fs_base )
> -            wrmsrl(MSR_FS_BASE, n->arch.pv_vcpu.fs_base);
> +            wrfsbase(n->arch.pv_vcpu.fs_base);
>  
>          /* Most kernels have non-zero GS base, so don't bother testing. */
>          /* (This is also a serialising instruction, avoiding AMD erratum #88.) */
> @@ -1100,7 +1100,7 @@ static void load_segments(struct vcpu *n
>  
>          /* This can only be non-zero if selector is NULL. */
>          if ( n->arch.pv_vcpu.gs_base_user )
> -            wrmsrl(MSR_GS_BASE, n->arch.pv_vcpu.gs_base_user);
> +            wrgsbase(n->arch.pv_vcpu.gs_base_user);
>  
>          /* If in kernel mode then switch the GS bases around. */
>          if ( (n->arch.flags & TF_kernel_mode) )
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -1264,6 +1264,9 @@ void __init __start_xen(unsigned long mb
>      if ( cpu_has_smep )
>          set_in_cr4(X86_CR4_SMEP);
>  
> +    if ( cpu_has_fsgsbase )
> +        set_in_cr4(X86_CR4_FSGSBASE);
> +
>      local_irq_enable();
>  
>      pt_pci_init();
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -1985,28 +1985,18 @@ static int emulate_privileged_op(struct 
>          }
>          else
>          {
> -            if ( lm_ovr == lm_seg_none || data_sel < 4 )
> +            switch ( lm_ovr )
>              {
> -                switch ( lm_ovr )
> -                {
> -                case lm_seg_none:
> -                    data_base = 0UL;
> -                    break;
> -                case lm_seg_fs:
> -                    data_base = v->arch.pv_vcpu.fs_base;
> -                    break;
> -                case lm_seg_gs:
> -                    if ( guest_kernel_mode(v, regs) )
> -                        data_base = v->arch.pv_vcpu.gs_base_kernel;
> -                    else
> -                        data_base = v->arch.pv_vcpu.gs_base_user;
> -                    break;
> -                }
> +            default:
> +                data_base = 0UL;
> +                break;
> +            case lm_seg_fs:
> +                data_base = rdfsbase();
> +                break;
> +            case lm_seg_gs:
> +                data_base = rdgsbase();
> +                break;
>              }
> -            else if ( !read_descriptor(data_sel, v, regs,
> -                                       &data_base, &data_limit, &ar, 0) ||
> -                      !(ar & _SEGMENT_S) || !(ar & _SEGMENT_P) )
> -                goto fail;
>              data_limit = ~0UL;
>              ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P;
>          }
> --- a/xen/include/asm-x86/domain.h
> +++ b/xen/include/asm-x86/domain.h
> @@ -457,12 +457,12 @@ unsigned long pv_guest_cr4_fixup(const s
>      (((v)->arch.pv_vcpu.ctrlreg[4]                          \
>        | (mmu_cr4_features                                   \
>           & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP |      \
> -            X86_CR4_OSXSAVE))                               \
> +            X86_CR4_OSXSAVE | X86_CR4_FSGSBASE))            \
>        | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0))         \
>       & ~X86_CR4_DE)
>  #define real_cr4_to_pv_guest_cr4(c)                         \
> -    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD        \
> -             | X86_CR4_OSXSAVE | X86_CR4_SMEP))
> +    ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD |      \
> +             X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE))
>  
>  void domain_cpuid(struct domain *d,
>                    unsigned int  input,
> --- a/xen/include/asm-x86/msr.h
> +++ b/xen/include/asm-x86/msr.h
> @@ -9,6 +9,7 @@
>  #include <xen/percpu.h>
>  #include <xen/errno.h>
>  #include <asm/asm_defns.h>
> +#include <asm/cpufeature.h>
>  
>  #define rdmsr(msr,val1,val2) \
>       __asm__ __volatile__("rdmsr" \
> @@ -97,6 +98,61 @@ static inline int wrmsr_safe(unsigned in
>  			  : "=a" (low), "=d" (high) \
>  			  : "c" (counter))
>  
> +static inline unsigned long rdfsbase(void)
> +{
> +    unsigned long base;
> +
> +    if ( cpu_has_fsgsbase )
> +#ifdef HAVE_GAS_FSGSBASE
> +        asm volatile ( "rdfsbase %0" : "=r" (base) );
> +#else
> +        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc0" : "=a" (base) );
> +#endif
> +    else
> +        rdmsrl(MSR_FS_BASE, base);
> +
> +    return base;
> +}
> +
> +static inline unsigned long rdgsbase(void)
> +{
> +    unsigned long base;
> +
> +    if ( cpu_has_fsgsbase )
> +#ifdef HAVE_GAS_FSGSBASE
> +        asm volatile ( "rdgsbase %0" : "=r" (base) );
> +#else
> +        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xc8" : "=a" (base) );
> +#endif
> +    else
> +        rdmsrl(MSR_GS_BASE, base);
> +
> +    return base;
> +}
> +
> +static inline void wrfsbase(unsigned long base)
> +{
> +    if ( cpu_has_fsgsbase )
> +#ifdef HAVE_GAS_FSGSBASE
> +        asm volatile ( "wrfsbase %0" :: "r" (base) );
> +#else
> +        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd0" :: "a" (base) );
> +#endif
> +    else
> +        wrmsrl(MSR_FS_BASE, base);
> +}
> +
> +static inline void wrgsbase(unsigned long base)
> +{
> +    if ( cpu_has_fsgsbase )
> +#ifdef HAVE_GAS_FSGSBASE
> +        asm volatile ( "wrgsbase %0" :: "r" (base) );
> +#else
> +        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8" :: "a" (base) );
> +#endif
> +    else
> +        wrmsrl(MSR_GS_BASE, base);
> +}
>  
>  DECLARE_PER_CPU(u64, efer);
>  u64 read_efer(void);
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel


[-- Attachment #1.2: Type: text/html, Size: 8255 bytes --]

[-- Attachment #2: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 4/4] x86: check for canonical address before doing page walks
  2013-10-10 14:10     ` Jan Beulich
@ 2013-10-10 14:17       ` Andrew Cooper
  0 siblings, 0 replies; 11+ messages in thread
From: Andrew Cooper @ 2013-10-10 14:17 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel, Keir Fraser

On 10/10/13 15:10, Jan Beulich wrote:
>>>> On 10.10.13 at 16:01, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> On 10/10/13 14:55, Jan Beulich wrote:
>>> --- a/xen/arch/x86/x86_64/traps.c
>>> +++ b/xen/arch/x86/x86_64/traps.c
>>> @@ -170,6 +170,8 @@ void show_page_walk(unsigned long addr)
>>>      l1_pgentry_t l1e, *l1t;
>>>  
>>>      printk("Pagetable walk from %016lx:\n", addr);
>>> +    if ( !is_canonical_address(addr) )
>>> +        return;
>>>  
>>>      l4t = map_domain_page(mfn);
>>>      l4e = l4t[l4_table_offset(addr)];
>>>
>> I was intending something a little more like
>>
>> if ( !is_canonical_address(addr) )
>> {
>>     printk(" Not canonical\n");
>>     return;
>> }
>>
>> So it doesn't appear on the console that show_page_walk() got stuck
>> somewhere.
> Seems pretty pointless - the address is being printed, and anyone
> (I would hope) can easily see whether it is non-canonical.
>
> Jan
>

True I suppose, although it would start getting harder if/when the
non-canonical section shrinks in size.

I would prefer the printk(), but am not overly fussed and the other fix
is certainly good, so

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/4] x86: XSA-67 follow-up
  2013-10-10 13:49 [PATCH 0/4] x86: XSA-67 follow-up Jan Beulich
                   ` (3 preceding siblings ...)
  2013-10-10 13:55 ` [PATCH 4/4] x86: check for canonical address before doing page walks Jan Beulich
@ 2013-10-10 15:13 ` Keir Fraser
  4 siblings, 0 replies; 11+ messages in thread
From: Keir Fraser @ 2013-10-10 15:13 UTC (permalink / raw)
  To: Jan Beulich, xen-devel

On 10/10/2013 14:49, "Jan Beulich" <JBeulich@suse.com> wrote:

> 1: correct LDT checks
> 2: add address validity check to guest_map_l1e()
> 3: use {rd,wr}{fs,gs}base when available
> 4: check for canonical address before doing page walks
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

All of them.

Acked-by: Keir Fraser <keir@xen.org>

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2013-10-10 15:14 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-10-10 13:49 [PATCH 0/4] x86: XSA-67 follow-up Jan Beulich
2013-10-10 13:53 ` [PATCH 1/4] x86: correct LDT checks Jan Beulich
2013-10-10 13:54 ` [PATCH 2/4] x86: add address validity check to guest_map_l1e() Jan Beulich
2013-10-10 13:57   ` Andrew Cooper
2013-10-10 13:54 ` [PATCH 3/4] x86: use {rd, wr}{fs, gs}base when available Jan Beulich
2013-10-10 14:15   ` Andrew Cooper
2013-10-10 13:55 ` [PATCH 4/4] x86: check for canonical address before doing page walks Jan Beulich
2013-10-10 14:01   ` Andrew Cooper
2013-10-10 14:10     ` Jan Beulich
2013-10-10 14:17       ` Andrew Cooper
2013-10-10 15:13 ` [PATCH 0/4] x86: XSA-67 follow-up Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.