All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/6] x86: Meltdown band-aid overhead reduction
@ 2018-03-13 13:37 Jan Beulich
  2018-03-13 13:47 ` [PATCH v3 1/6] x86: NOP out XPTI entry/exit code when it's not in use Jan Beulich
                   ` (5 more replies)
  0 siblings, 6 replies; 20+ messages in thread
From: Jan Beulich @ 2018-03-13 13:37 UTC (permalink / raw)
  To: xen-devel; +Cc: Juergen Gross, Andrew Cooper

1: NOP out most XPTI entry/exit code when it's not in use
2: disable XPTI when RDCL_NO
3: x86: log XPTI enabled status
4: use %r12 to write zero into xen_cr3
5: reduce .text.entry
6: avoid double CR3 reload when switching to guest user mode

Signed-off-by: Jan Beulich <jbeulich@suse.com>



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v3 1/6] x86: NOP out XPTI entry/exit code when it's not in use
  2018-03-13 13:37 [PATCH v3 0/6] x86: Meltdown band-aid overhead reduction Jan Beulich
@ 2018-03-13 13:47 ` Jan Beulich
  2018-03-15 19:44   ` Andrew Cooper
  2018-03-13 13:48 ` [PATCH v3 2/6] x86: disable XPTI when RDCL_NO Jan Beulich
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2018-03-13 13:47 UTC (permalink / raw)
  To: xen-devel; +Cc: Juergen Gross, Andrew Cooper

Introduce a synthetic feature flag to use alternative instruction
patching to NOP out all code on entry/exit paths. Having NOPs here is
generally better than using conditional branches.

Also change the limit on the number of bytes we can patch in one go to
that resulting from the encoding in struct alt_instr - there's no point
reducing it below that limit, and without a check being in place that
the limit isn't actually exceeded, such an artificial boundary is a
latent risk.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
---
v3: Also patch NMI/#MC paths. Re-base.
v2: Introduce and use ALTERNATIVE_NOP. Re-base.

--- a/xen/arch/x86/alternative.c
+++ b/xen/arch/x86/alternative.c
@@ -26,7 +26,7 @@
 #include <asm/nmi.h>
 #include <xen/livepatch.h>
 
-#define MAX_PATCH_LEN (255-1)
+#define MAX_PATCH_LEN 255
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3709,7 +3709,7 @@ long do_mmu_update(
                      * to the page lock we hold, its pinned status, and uses on
                      * this (v)CPU.
                      */
-                    if ( !rc && this_cpu(root_pgt) &&
+                    if ( !rc && !cpu_has_no_xpti &&
                          ((page->u.inuse.type_info & PGT_count_mask) >
                           (1 + !!(page->u.inuse.type_info & PGT_pinned) +
                            (pagetable_get_pfn(curr->arch.guest_table) == mfn) +
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -169,6 +169,9 @@ static int __init parse_smap_param(const
 }
 custom_param("smap", parse_smap_param);
 
+static int8_t __initdata opt_xpti = -1;
+boolean_param("xpti", opt_xpti);
+
 bool __read_mostly acpi_disabled;
 bool __initdata acpi_force;
 static char __initdata acpi_param[10] = "";
@@ -1541,6 +1544,13 @@ void __init noreturn __start_xen(unsigne
 
     cr4_pv32_mask = mmu_cr4_features & XEN_CR4_PV32_BITS;
 
+    if ( opt_xpti < 0 )
+        opt_xpti = boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
+    if ( opt_xpti )
+        setup_clear_cpu_cap(X86_FEATURE_NO_XPTI);
+    else
+        setup_force_cpu_cap(X86_FEATURE_NO_XPTI);
+
     if ( cpu_has_fsgsbase )
         set_in_cr4(X86_CR4_FSGSBASE);
 
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -752,8 +752,6 @@ static int clone_mapping(const void *ptr
     return 0;
 }
 
-static __read_mostly int8_t opt_xpti = -1;
-boolean_param("xpti", opt_xpti);
 DEFINE_PER_CPU(root_pgentry_t *, root_pgt);
 
 static root_pgentry_t common_pgt;
@@ -766,7 +764,7 @@ static int setup_cpu_root_pgt(unsigned i
     unsigned int off;
     int rc;
 
-    if ( !opt_xpti )
+    if ( cpu_has_no_xpti )
         return 0;
 
     rpt = alloc_xen_pagetable();
@@ -1047,9 +1045,6 @@ void __init smp_prepare_cpus(unsigned in
 
     stack_base[0] = stack_start;
 
-    if ( opt_xpti < 0 )
-        opt_xpti = boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
-
     rc = setup_cpu_root_pgt(0);
     if ( rc )
         panic("Error %d setting up PV root page table\n", rc);
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -194,7 +194,7 @@ ENTRY(compat_post_handle_exception)
 
 /* See lstar_enter for entry register state. */
 ENTRY(cstar_enter)
-        /* sti could live here when we don't switch page tables below. */
+        ALTERNATIVE nop, sti, X86_FEATURE_NO_XPTI
         CR4_PV32_RESTORE
         movq  8(%rsp),%rax /* Restore %rax. */
         movq  $FLAT_KERNEL_SS,8(%rsp)
@@ -209,6 +209,7 @@ ENTRY(cstar_enter)
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
         GET_STACK_END(bx)
+.Lcstar_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
         jz    .Lcstar_cr3_okay
@@ -218,6 +219,8 @@ ENTRY(cstar_enter)
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lcstar_cr3_okay:
         sti
+.Lcstar_cr3_end:
+        ALTERNATIVE_NOP .Lcstar_cr3_start, .Lcstar_cr3_end, X86_FEATURE_NO_XPTI
 
         movq  STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx
         movq  VCPU_domain(%rbx),%rcx
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -45,6 +45,7 @@ restore_all_guest:
         mov VCPUMSR_spec_ctrl_raw(%rdx), %r15d
 
         /* Copy guest mappings and switch to per-CPU root page table. */
+.Lrag_cr3_start:
         mov   VCPU_cr3(%rbx), %r9
         GET_STACK_END(dx)
         mov   STACK_CPUINFO_FIELD(pv_cr3)(%rdx), %rdi
@@ -52,7 +53,6 @@ restore_all_guest:
         movabs $DIRECTMAP_VIRT_START, %rcx
         mov   %rdi, %rax
         and   %rsi, %rdi
-        jz    .Lrag_keep_cr3
         and   %r9, %rsi
         add   %rcx, %rdi
         add   %rcx, %rsi
@@ -74,7 +74,8 @@ restore_all_guest:
         mov   %rdi, %cr4
         mov   %rax, %cr3
         mov   %rsi, %cr4
-.Lrag_keep_cr3:
+.Lrag_cr3_end:
+        ALTERNATIVE_NOP .Lrag_cr3_start, .Lrag_cr3_end, X86_FEATURE_NO_XPTI
 
         /* Restore stashed SPEC_CTRL value. */
         mov   %r15d, %eax
@@ -121,6 +122,7 @@ restore_all_xen:
          * case we return to late PV exit code (from an NMI or #MC).
          */
         GET_STACK_END(bx)
+.Lrax_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rdx
         mov   STACK_CPUINFO_FIELD(pv_cr3)(%rbx), %rax
         test  %rdx, %rdx
@@ -136,6 +138,8 @@ UNLIKELY_START(g, exit_cr3)
         mov   %rax, %cr3
         mov   %rsi, %cr4
 UNLIKELY_END(exit_cr3)
+.Lrax_cr3_end:
+        ALTERNATIVE_NOP .Lrax_cr3_start, .Lrax_cr3_end, X86_FEATURE_NO_XPTI
 
         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
         SPEC_CTRL_EXIT_TO_XEN_IST /* Req: %rbx=end, Clob: acd */
@@ -160,7 +164,7 @@ UNLIKELY_END(exit_cr3)
  * %ss must be saved into the space left by the trampoline.
  */
 ENTRY(lstar_enter)
-        /* sti could live here when we don't switch page tables below. */
+        ALTERNATIVE nop, sti, X86_FEATURE_NO_XPTI
         movq  8(%rsp),%rax /* Restore %rax. */
         movq  $FLAT_KERNEL_SS,8(%rsp)
         pushq %r11
@@ -174,6 +178,7 @@ ENTRY(lstar_enter)
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
         GET_STACK_END(bx)
+.Llstar_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
         jz    .Llstar_cr3_okay
@@ -183,6 +188,8 @@ ENTRY(lstar_enter)
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Llstar_cr3_okay:
         sti
+.Llstar_cr3_end:
+        ALTERNATIVE_NOP .Llstar_cr3_start, .Llstar_cr3_end, X86_FEATURE_NO_XPTI
 
         movq  STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx
         testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
@@ -265,7 +272,7 @@ process_trap:
         jmp  test_all_events
 
 ENTRY(sysenter_entry)
-        /* sti could live here when we don't switch page tables below. */
+        ALTERNATIVE nop, sti, X86_FEATURE_NO_XPTI
         pushq $FLAT_USER_SS
         pushq $0
         pushfq
@@ -281,6 +288,7 @@ GLOBAL(sysenter_eflags_saved)
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
         GET_STACK_END(bx)
+.Lsyse_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
         jz    .Lsyse_cr3_okay
@@ -290,6 +298,8 @@ GLOBAL(sysenter_eflags_saved)
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lsyse_cr3_okay:
         sti
+.Lsyse_cr3_end:
+        ALTERNATIVE_NOP .Lsyse_cr3_start, .Lsyse_cr3_end, X86_FEATURE_NO_XPTI
 
         movq  STACK_CPUINFO_FIELD(current_vcpu)(%rbx), %rbx
         cmpb  $0,VCPU_sysenter_disables_events(%rbx)
@@ -331,6 +341,7 @@ ENTRY(int80_direct_trap)
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
         GET_STACK_END(bx)
+.Lint80_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
         neg   %rcx
         jz    .Lint80_cr3_okay
@@ -340,6 +351,8 @@ ENTRY(int80_direct_trap)
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lint80_cr3_okay:
         sti
+.Lint80_cr3_end:
+        ALTERNATIVE_NOP .Lint80_cr3_start, .Lint80_cr3_end, X86_FEATURE_NO_XPTI
 
         cmpb  $0,untrusted_msi(%rip)
 UNLIKELY_START(ne, msi_check)
@@ -539,6 +552,7 @@ ENTRY(common_interrupt)
         SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
+.Lintr_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
         mov   %rcx, %r15
         neg   %rcx
@@ -557,9 +571,14 @@ ENTRY(common_interrupt)
         CR4_PV32_RESTORE
         movq %rsp,%rdi
         callq do_IRQ
+.Lintr_cr3_restore:
         mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+.Lintr_cr3_end:
         jmp ret_from_intr
 
+        ALTERNATIVE_NOP .Lintr_cr3_restore, .Lintr_cr3_end, X86_FEATURE_NO_XPTI
+        ALTERNATIVE_NOP .Lintr_cr3_start, .Lintr_cr3_okay, X86_FEATURE_NO_XPTI
+
 /* No special register assumptions. */
 ENTRY(ret_from_intr)
         GET_CURRENT(bx)
@@ -581,6 +600,7 @@ GLOBAL(handle_exception)
         SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
+.Lxcpt_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
         mov   %rcx, %r15
         neg   %rcx
@@ -647,7 +667,9 @@ handle_exception_saved:
         PERFC_INCR(exceptions, %rax, %rbx)
         mov   (%rdx, %rax, 8), %rdx
         INDIRECT_CALL %rdx
+.Lxcpt_cr3_restore1:
         mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+.Lxcpt_cr3_end1:
         testb $3,UREGS_cs(%rsp)
         jz    restore_all_xen
         leaq  VCPU_trap_bounce(%rbx),%rdx
@@ -680,9 +702,17 @@ exception_with_ints_disabled:
         rep;  movsq                     # make room for ec/ev
 1:      movq  UREGS_error_code(%rsp),%rax # ec/ev
         movq  %rax,UREGS_kernel_sizeof(%rsp)
+.Lxcpt_cr3_restore2:
         mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+.Lxcpt_cr3_end2:
         jmp   restore_all_xen           # return to fixup code
 
+        ALTERNATIVE_NOP .Lxcpt_cr3_restore1, .Lxcpt_cr3_end1, \
+                        X86_FEATURE_NO_XPTI
+        ALTERNATIVE_NOP .Lxcpt_cr3_restore2, .Lxcpt_cr3_end2, \
+                        X86_FEATURE_NO_XPTI
+        ALTERNATIVE_NOP .Lxcpt_cr3_start, .Lxcpt_cr3_okay, X86_FEATURE_NO_XPTI
+
 /* No special register assumptions. */
 FATAL_exception_with_ints_disabled:
         xorl  %esi,%esi
@@ -798,6 +828,7 @@ handle_ist_exception:
         SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, Clob: acd */
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
+.List_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
         mov   %rcx, %r15
         neg   %rcx
@@ -828,10 +859,15 @@ handle_ist_exception:
         leaq  exception_table(%rip),%rdx
         mov   (%rdx, %rax, 8), %rdx
         INDIRECT_CALL %rdx
+.List_cr3_restore:
         mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+.List_cr3_end:
         cmpb  $TRAP_nmi,UREGS_entry_vector(%rsp)
         jne   ret_from_intr
 
+        ALTERNATIVE_NOP .List_cr3_restore, .List_cr3_end, X86_FEATURE_NO_XPTI
+        ALTERNATIVE_NOP .List_cr3_start, .List_cr3_okay, X86_FEATURE_NO_XPTI
+
         /* We want to get straight to the IRET on the NMI exit path. */
         testb $3,UREGS_cs(%rsp)
         jz    restore_all_xen
--- a/xen/include/asm-x86/alternative-asm.h
+++ b/xen/include/asm-x86/alternative-asm.h
@@ -101,6 +101,13 @@
 #undef decl_orig
 #undef as_true
 
+/* Macro to replace an entire range by suitable NOPs. */
+.macro ALTERNATIVE_NOP start, end, feature
+    .pushsection .altinstructions, "a", @progbits
+    altinstruction_entry \start, \start, \feature, "\end - \start", 0, 0
+    .popsection
+.endm
+
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_X86_ALTERNATIVE_ASM_H_ */
 
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -110,6 +110,7 @@
 #define cpu_has_cpuid_faulting  boot_cpu_has(X86_FEATURE_CPUID_FAULTING)
 #define cpu_has_aperfmperf      boot_cpu_has(X86_FEATURE_APERFMPERF)
 #define cpu_has_lfence_dispatch boot_cpu_has(X86_FEATURE_LFENCE_DISPATCH)
+#define cpu_has_no_xpti         boot_cpu_has(X86_FEATURE_NO_XPTI)
 
 enum _cache_type {
     CACHE_TYPE_NULL = 0,
--- a/xen/include/asm-x86/cpufeatures.h
+++ b/xen/include/asm-x86/cpufeatures.h
@@ -29,4 +29,5 @@ XEN_CPUFEATURE(XEN_IBPB,        (FSCAPIN
 XEN_CPUFEATURE(XEN_IBRS_SET,    (FSCAPINTS+0)*32+16) /* IBRSB && IRBS set in Xen */
 XEN_CPUFEATURE(XEN_IBRS_CLEAR,  (FSCAPINTS+0)*32+17) /* IBRSB && IBRS clear in Xen */
 XEN_CPUFEATURE(RSB_NATIVE,      (FSCAPINTS+0)*32+18) /* RSB overwrite needed for native */
-XEN_CPUFEATURE(RSB_VMEXIT,      (FSCAPINTS+0)*32+20) /* RSB overwrite needed for vmexit */
+XEN_CPUFEATURE(RSB_VMEXIT,      (FSCAPINTS+0)*32+19) /* RSB overwrite needed for vmexit */
+XEN_CPUFEATURE(NO_XPTI,         (FSCAPINTS+0)*32+20) /* XPTI mitigation not in use */



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v3 2/6] x86: disable XPTI when RDCL_NO
  2018-03-13 13:37 [PATCH v3 0/6] x86: Meltdown band-aid overhead reduction Jan Beulich
  2018-03-13 13:47 ` [PATCH v3 1/6] x86: NOP out XPTI entry/exit code when it's not in use Jan Beulich
@ 2018-03-13 13:48 ` Jan Beulich
  2018-03-15 15:54   ` Andrew Cooper
  2018-03-13 13:48 ` [PATCH v3 3/6] x86: log XPTI enabled status Jan Beulich
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2018-03-13 13:48 UTC (permalink / raw)
  To: xen-devel; +Cc: Juergen Gross, Andrew Cooper

Use the respective ARCH_CAPABILITIES MSR bit, but don't expose the MSR
to guests yet.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
---
v3: Re-base.
v2: New.

--- a/tools/libxl/libxl_cpuid.c
+++ b/tools/libxl/libxl_cpuid.c
@@ -204,6 +204,7 @@ int libxl_cpuid_parse_config(libxl_cpuid
         {"avx512-4fmaps",0x00000007,  0, CPUID_REG_EDX,  3,  1},
         {"ibrsb",        0x00000007,  0, CPUID_REG_EDX, 26,  1},
         {"stibp",        0x00000007,  0, CPUID_REG_EDX, 27,  1},
+        {"arch-caps",    0x00000007,  0, CPUID_REG_EDX, 29,  1},
 
         {"lahfsahf",     0x80000001, NA, CPUID_REG_ECX,  0,  1},
         {"cmplegacy",    0x80000001, NA, CPUID_REG_ECX,  1,  1},
--- a/tools/misc/xen-cpuid.c
+++ b/tools/misc/xen-cpuid.c
@@ -143,6 +143,7 @@ static const char *str_7d0[32] =
     [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps",
 
     [26] = "ibrsb",         [27] = "stibp",
+    /* 28 */                [29] = "arch_caps",
 };
 
 static struct {
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1545,7 +1545,16 @@ void __init noreturn __start_xen(unsigne
     cr4_pv32_mask = mmu_cr4_features & XEN_CR4_PV32_BITS;
 
     if ( opt_xpti < 0 )
-        opt_xpti = boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
+    {
+        uint64_t caps = 0;
+
+        if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
+            caps = ARCH_CAPABILITIES_RDCL_NO;
+        else if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
+            rdmsrl(MSR_ARCH_CAPABILITIES, caps);
+
+        opt_xpti = !(caps & ARCH_CAPABILITIES_RDCL_NO);
+    }
     if ( opt_xpti )
         setup_clear_cpu_cap(X86_FEATURE_NO_XPTI);
     else
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -40,6 +40,8 @@
 #define PRED_CMD_IBPB			(_AC(1, ULL) << 0)
 
 #define MSR_ARCH_CAPABILITIES		0x0000010a
+#define ARCH_CAPABILITIES_RDCL_NO	(_AC(1, ULL) << 0)
+#define ARCH_CAPABILITIES_IBRS_ALL	(_AC(1, ULL) << 1)
 
 /* Intel MSRs. Some also available on other CPUs */
 #define MSR_IA32_PERFCTR0		0x000000c1
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -244,6 +244,7 @@ XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /
 XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A  AVX512 Multiply Accumulation Single Precision */
 XEN_CPUFEATURE(IBRSB,         9*32+26) /*A  IBRS and IBPB support (used by Intel) */
 XEN_CPUFEATURE(STIBP,         9*32+27) /*A! STIBP */
+XEN_CPUFEATURE(ARCH_CAPS,     9*32+29) /*   IA32_ARCH_CAPABILITIES MSR */
 
 #endif /* XEN_CPUFEATURE */
 




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v3 3/6] x86: log XPTI enabled status
  2018-03-13 13:37 [PATCH v3 0/6] x86: Meltdown band-aid overhead reduction Jan Beulich
  2018-03-13 13:47 ` [PATCH v3 1/6] x86: NOP out XPTI entry/exit code when it's not in use Jan Beulich
  2018-03-13 13:48 ` [PATCH v3 2/6] x86: disable XPTI when RDCL_NO Jan Beulich
@ 2018-03-13 13:48 ` Jan Beulich
  2018-03-15 15:56   ` Andrew Cooper
  2018-03-13 13:49 ` [PATCH v3 4/6] x86/XPTI: use %r12 to write zero into xen_cr3 Jan Beulich
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2018-03-13 13:48 UTC (permalink / raw)
  To: xen-devel; +Cc: Juergen Gross, Andrew Cooper

At the same time also report the state of the two defined
ARCH_CAPABILITIES MSR bits. To avoid further complicating the
conditional around that printk(), drop it (it's a debug level one only
anyway).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
---
v2: Re-base over split off earlier patch. Drop MSR_ from
    MSR_ARCH_CAPABILITIES_*. Drop conditional around debug printk().

--- a/xen/arch/x86/spec_ctrl.c
+++ b/xen/arch/x86/spec_ctrl.c
@@ -21,7 +21,7 @@
 #include <xen/lib.h>
 
 #include <asm/microcode.h>
-#include <asm/msr-index.h>
+#include <asm/msr.h>
 #include <asm/processor.h>
 #include <asm/spec_ctrl.h>
 #include <asm/spec_ctrl_asm.h>
@@ -100,23 +100,25 @@ custom_param("bti", parse_bti);
 static void __init print_details(enum ind_thunk thunk)
 {
     unsigned int _7d0 = 0, e8b = 0, tmp;
+    uint64_t caps = 0;
 
     /* Collect diagnostics about available mitigations. */
     if ( boot_cpu_data.cpuid_level >= 7 )
         cpuid_count(7, 0, &tmp, &tmp, &tmp, &_7d0);
     if ( boot_cpu_data.extended_cpuid_level >= 0x80000008 )
         cpuid(0x80000008, &tmp, &e8b, &tmp, &tmp);
+    if ( _7d0 & cpufeat_mask(X86_FEATURE_ARCH_CAPS) )
+        rdmsrl(MSR_ARCH_CAPABILITIES, caps);
 
     printk(XENLOG_DEBUG "Speculative mitigation facilities:\n");
 
     /* Hardware features which pertain to speculative mitigations. */
-    if ( (_7d0 & (cpufeat_mask(X86_FEATURE_IBRSB) |
-                  cpufeat_mask(X86_FEATURE_STIBP))) ||
-         (e8b & cpufeat_mask(X86_FEATURE_IBPB)) )
-        printk(XENLOG_DEBUG "  Hardware features:%s%s%s\n",
-               (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "",
-               (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP"     : "",
-               (e8b  & cpufeat_mask(X86_FEATURE_IBPB))  ? " IBPB"      : "");
+    printk(XENLOG_DEBUG "  Hardware features:%s%s%s%s%s\n",
+           (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "",
+           (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP"     : "",
+           (e8b  & cpufeat_mask(X86_FEATURE_IBPB))  ? " IBPB"      : "",
+           (caps & ARCH_CAPABILITIES_IBRS_ALL)      ? " IBRS_ALL"  : "",
+           (caps & ARCH_CAPABILITIES_RDCL_NO)       ? " RDCL_NO"   : "");
 
     /* Compiled-in support which pertains to BTI mitigations. */
     if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) )
@@ -133,6 +135,9 @@ static void __init print_details(enum in
            opt_ibpb                                  ? " IBPB"       : "",
            boot_cpu_has(X86_FEATURE_RSB_NATIVE)      ? " RSB_NATIVE" : "",
            boot_cpu_has(X86_FEATURE_RSB_VMEXIT)      ? " RSB_VMEXIT" : "");
+
+    printk(XENLOG_INFO "XPTI: %s\n",
+           boot_cpu_has(X86_FEATURE_NO_XPTI) ? "disabled" : "enabled");
 }
 
 /* Calculate whether Retpoline is known-safe on this CPU. */




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v3 4/6] x86/XPTI: use %r12 to write zero into xen_cr3
  2018-03-13 13:37 [PATCH v3 0/6] x86: Meltdown band-aid overhead reduction Jan Beulich
                   ` (2 preceding siblings ...)
  2018-03-13 13:48 ` [PATCH v3 3/6] x86: log XPTI enabled status Jan Beulich
@ 2018-03-13 13:49 ` Jan Beulich
  2018-03-15 16:02   ` Andrew Cooper
  2018-03-13 13:50 ` [PATCH v3 5/6] x86/XPTI: reduce .text.entry Jan Beulich
  2018-03-13 13:50 ` [PATCH v3 6/6] x86: avoid double CR3 reload when switching to guest user mode Jan Beulich
  5 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2018-03-13 13:49 UTC (permalink / raw)
  To: xen-devel; +Cc: Juergen Gross, Andrew Cooper

Now that we zero all registers early on all entry paths, use that to
avoid a couple of immediates here.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
We may want to consider eliminating a few more $0 this way. But
especially for byte ones I'm not sure it's worth it, due to the REX
prefix the use of %r12 would incur.

--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -216,7 +216,7 @@ ENTRY(cstar_enter)
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
         neg   %rcx
         mov   %rcx, %cr3
-        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
+        mov   %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lcstar_cr3_okay:
         sti
 .Lcstar_cr3_end:
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -185,7 +185,7 @@ ENTRY(lstar_enter)
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
         neg   %rcx
         mov   %rcx, %cr3
-        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
+        mov   %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Llstar_cr3_okay:
         sti
 .Llstar_cr3_end:
@@ -295,7 +295,7 @@ GLOBAL(sysenter_eflags_saved)
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
         neg   %rcx
         mov   %rcx, %cr3
-        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
+        mov   %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lsyse_cr3_okay:
         sti
 .Lsyse_cr3_end:
@@ -348,7 +348,7 @@ ENTRY(int80_direct_trap)
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
         neg   %rcx
         mov   %rcx, %cr3
-        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
+        mov   %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lint80_cr3_okay:
         sti
 .Lint80_cr3_end:
@@ -562,10 +562,9 @@ ENTRY(common_interrupt)
         neg   %rcx
 .Lintr_cr3_load:
         mov   %rcx, %cr3
-        xor   %ecx, %ecx
-        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+        mov   %r12, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         testb $3, UREGS_cs(%rsp)
-        cmovnz %rcx, %r15
+        cmovnz %r12, %r15
 .Lintr_cr3_okay:
 
         CR4_PV32_RESTORE
@@ -610,10 +609,9 @@ GLOBAL(handle_exception)
         neg   %rcx
 .Lxcpt_cr3_load:
         mov   %rcx, %cr3
-        xor   %ecx, %ecx
-        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+        mov   %r12, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         testb $3, UREGS_cs(%rsp)
-        cmovnz %rcx, %r15
+        cmovnz %r12, %r15
 .Lxcpt_cr3_okay:
 
 handle_exception_saved:
@@ -838,7 +836,7 @@ handle_ist_exception:
         neg   %rcx
 .List_cr3_load:
         mov   %rcx, %cr3
-        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+        mov   %r12, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
 .List_cr3_okay:
 
         CR4_PV32_RESTORE




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v3 5/6] x86/XPTI: reduce .text.entry
  2018-03-13 13:37 [PATCH v3 0/6] x86: Meltdown band-aid overhead reduction Jan Beulich
                   ` (3 preceding siblings ...)
  2018-03-13 13:49 ` [PATCH v3 4/6] x86/XPTI: use %r12 to write zero into xen_cr3 Jan Beulich
@ 2018-03-13 13:50 ` Jan Beulich
  2018-03-15 17:10   ` Andrew Cooper
  2018-03-13 13:50 ` [PATCH v3 6/6] x86: avoid double CR3 reload when switching to guest user mode Jan Beulich
  5 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2018-03-13 13:50 UTC (permalink / raw)
  To: xen-devel; +Cc: Juergen Gross, Andrew Cooper

This exposes less code pieces and at the same time reduces the range
covered from slightly above 3 pages to a little below 2 of them.

The code being moved is unchanged, except for the removal of trailing
blanks, insertion of blanks between operands, and a pointless q suffix
from "retq".

A few more small pieces could be moved, but it seems better to me to
leave them where they are to not make it overly hard to follow code
paths.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -13,8 +13,6 @@
 #include <public/xen.h>
 #include <irq_vectors.h>
 
-        .section .text.entry, "ax", @progbits
-
 ENTRY(entry_int82)
         ASM_CLAC
         pushq $0
@@ -192,6 +190,8 @@ ENTRY(compat_post_handle_exception)
         movb  $0,TRAPBOUNCE_flags(%rdx)
         jmp   compat_test_all_events
 
+        .section .text.entry, "ax", @progbits
+
 /* See lstar_enter for entry register state. */
 ENTRY(cstar_enter)
         ALTERNATIVE nop, sti, X86_FEATURE_NO_XPTI
@@ -249,6 +249,8 @@ UNLIKELY_END(compat_syscall_gpf)
         movb  %cl,TRAPBOUNCE_flags(%rdx)
         jmp   .Lcompat_bounce_exception
 
+        .text
+
 ENTRY(compat_sysenter)
         CR4_PV32_RESTORE
         movq  VCPU_trap_ctxt(%rbx),%rcx
@@ -268,9 +270,6 @@ ENTRY(compat_int80_direct_trap)
         call  compat_create_bounce_frame
         jmp   compat_test_all_events
 
-        /* compat_create_bounce_frame & helpers don't need to be in .text.entry */
-        .text
-
 /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK:            */
 /*   {[ERRCODE,] EIP, CS, EFLAGS, [ESP, SS]}                             */
 /* %rdx: trap_bounce, %rbx: struct vcpu                                  */
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -14,8 +14,6 @@
 #include <public/xen.h>
 #include <irq_vectors.h>
 
-        .section .text.entry, "ax", @progbits
-
 /* %rbx: struct vcpu */
 ENTRY(switch_to_kernel)
         leaq  VCPU_trap_bounce(%rbx),%rdx
@@ -34,8 +32,107 @@ ENTRY(switch_to_kernel)
         movb  %cl,TRAPBOUNCE_flags(%rdx)
         call  create_bounce_frame
         andl  $~X86_EFLAGS_DF,UREGS_eflags(%rsp)
+/* %rbx: struct vcpu */
+test_all_events:
+        ASSERT_NOT_IN_ATOMIC
+        cli                             # tests must not race interrupts
+/*test_softirqs:*/
+        movl  VCPU_processor(%rbx), %eax
+        shll  $IRQSTAT_shift, %eax
+        leaq  irq_stat+IRQSTAT_softirq_pending(%rip), %rcx
+        cmpl  $0, (%rcx, %rax, 1)
+        jne   process_softirqs
+        cmpb  $0, VCPU_mce_pending(%rbx)
+        jne   process_mce
+.Ltest_guest_nmi:
+        cmpb  $0, VCPU_nmi_pending(%rbx)
+        jne   process_nmi
+test_guest_events:
+        movq  VCPU_vcpu_info(%rbx), %rax
+        movzwl VCPUINFO_upcall_pending(%rax), %eax
+        decl  %eax
+        cmpl  $0xfe, %eax
+        ja    restore_all_guest
+/*process_guest_events:*/
+        sti
+        leaq  VCPU_trap_bounce(%rbx), %rdx
+        movq  VCPU_event_addr(%rbx), %rax
+        movq  %rax, TRAPBOUNCE_eip(%rdx)
+        movb  $TBF_INTERRUPT, TRAPBOUNCE_flags(%rdx)
+        call  create_bounce_frame
         jmp   test_all_events
 
+        ALIGN
+/* %rbx: struct vcpu */
+process_softirqs:
+        sti
+        call do_softirq
+        jmp  test_all_events
+
+        ALIGN
+/* %rbx: struct vcpu */
+process_mce:
+        testb $1 << VCPU_TRAP_MCE, VCPU_async_exception_mask(%rbx)
+        jnz  .Ltest_guest_nmi
+        sti
+        movb $0, VCPU_mce_pending(%rbx)
+        call set_guest_machinecheck_trapbounce
+        test %eax, %eax
+        jz   test_all_events
+        movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the
+        movb %dl, VCPU_mce_old_mask(%rbx)            # iret hypercall
+        orl  $1 << VCPU_TRAP_MCE, %edx
+        movb %dl, VCPU_async_exception_mask(%rbx)
+        jmp  process_trap
+
+        ALIGN
+/* %rbx: struct vcpu */
+process_nmi:
+        testb $1 << VCPU_TRAP_NMI, VCPU_async_exception_mask(%rbx)
+        jnz  test_guest_events
+        sti
+        movb $0, VCPU_nmi_pending(%rbx)
+        call set_guest_nmi_trapbounce
+        test %eax, %eax
+        jz   test_all_events
+        movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the
+        movb %dl, VCPU_nmi_old_mask(%rbx)            # iret hypercall
+        orl  $1 << VCPU_TRAP_NMI, %edx
+        movb %dl, VCPU_async_exception_mask(%rbx)
+        /* FALLTHROUGH */
+process_trap:
+        leaq VCPU_trap_bounce(%rbx), %rdx
+        call create_bounce_frame
+        jmp  test_all_events
+
+/* No special register assumptions. */
+ENTRY(ret_from_intr)
+        GET_CURRENT(bx)
+        testb $3, UREGS_cs(%rsp)
+        jz    restore_all_xen
+        movq  VCPU_domain(%rbx), %rax
+        cmpb  $0, DOMAIN_is_32bit_pv(%rax)
+        je    test_all_events
+        jmp   compat_test_all_events
+
+/* Enable NMIs.  No special register assumptions. Only %rax is not preserved. */
+ENTRY(enable_nmis)
+        movq  %rsp, %rax /* Grab RSP before pushing */
+
+        /* Set up stack frame */
+        pushq $0               /* SS */
+        pushq %rax             /* RSP */
+        pushfq                 /* RFLAGS */
+        pushq $__HYPERVISOR_CS /* CS */
+        leaq  1f(%rip), %rax
+        pushq %rax             /* RIP */
+
+        iretq /* Disable the hardware NMI latch */
+1:
+        ret
+
+        .section .text.entry, "ax", @progbits
+
 /* %rbx: struct vcpu, interrupts disabled */
 restore_all_guest:
         ASSERT_INTERRUPTS_DISABLED
@@ -197,80 +294,8 @@ ENTRY(lstar_enter)
 
         mov   %rsp, %rdi
         call  pv_hypercall
-
-/* %rbx: struct vcpu */
-test_all_events:
-        ASSERT_NOT_IN_ATOMIC
-        cli                             # tests must not race interrupts
-/*test_softirqs:*/  
-        movl  VCPU_processor(%rbx),%eax
-        shll  $IRQSTAT_shift,%eax
-        leaq  irq_stat+IRQSTAT_softirq_pending(%rip),%rcx
-        cmpl  $0,(%rcx,%rax,1)
-        jne   process_softirqs
-        cmpb  $0, VCPU_mce_pending(%rbx)
-        jne   process_mce
-.Ltest_guest_nmi:
-        cmpb  $0, VCPU_nmi_pending(%rbx)
-        jne   process_nmi
-test_guest_events:
-        movq  VCPU_vcpu_info(%rbx),%rax
-        movzwl VCPUINFO_upcall_pending(%rax),%eax
-        decl  %eax
-        cmpl  $0xfe,%eax
-        ja    restore_all_guest
-/*process_guest_events:*/
-        sti
-        leaq  VCPU_trap_bounce(%rbx),%rdx
-        movq  VCPU_event_addr(%rbx),%rax
-        movq  %rax,TRAPBOUNCE_eip(%rdx)
-        movb  $TBF_INTERRUPT,TRAPBOUNCE_flags(%rdx)
-        call  create_bounce_frame
         jmp   test_all_events
 
-        ALIGN
-/* %rbx: struct vcpu */
-process_softirqs:
-        sti       
-        call do_softirq
-        jmp  test_all_events
-
-        ALIGN
-/* %rbx: struct vcpu */
-process_mce:
-        testb $1 << VCPU_TRAP_MCE,VCPU_async_exception_mask(%rbx)
-        jnz  .Ltest_guest_nmi
-        sti
-        movb $0,VCPU_mce_pending(%rbx)
-        call set_guest_machinecheck_trapbounce
-        test %eax,%eax
-        jz   test_all_events
-        movzbl VCPU_async_exception_mask(%rbx),%edx # save mask for the
-        movb %dl,VCPU_mce_old_mask(%rbx)            # iret hypercall
-        orl  $1 << VCPU_TRAP_MCE,%edx
-        movb %dl,VCPU_async_exception_mask(%rbx)
-        jmp  process_trap
-
-        ALIGN
-/* %rbx: struct vcpu */
-process_nmi:
-        testb $1 << VCPU_TRAP_NMI,VCPU_async_exception_mask(%rbx)
-        jnz  test_guest_events
-        sti
-        movb $0,VCPU_nmi_pending(%rbx)
-        call set_guest_nmi_trapbounce
-        test %eax,%eax
-        jz   test_all_events
-        movzbl VCPU_async_exception_mask(%rbx),%edx # save mask for the
-        movb %dl,VCPU_nmi_old_mask(%rbx)            # iret hypercall
-        orl  $1 << VCPU_TRAP_NMI,%edx
-        movb %dl,VCPU_async_exception_mask(%rbx)
-        /* FALLTHROUGH */
-process_trap:
-        leaq VCPU_trap_bounce(%rbx),%rdx
-        call create_bounce_frame
-        jmp  test_all_events
-
 ENTRY(sysenter_entry)
         ALTERNATIVE nop, sti, X86_FEATURE_NO_XPTI
         pushq $FLAT_USER_SS
@@ -578,16 +603,6 @@ ENTRY(common_interrupt)
         ALTERNATIVE_NOP .Lintr_cr3_restore, .Lintr_cr3_end, X86_FEATURE_NO_XPTI
         ALTERNATIVE_NOP .Lintr_cr3_start, .Lintr_cr3_okay, X86_FEATURE_NO_XPTI
 
-/* No special register assumptions. */
-ENTRY(ret_from_intr)
-        GET_CURRENT(bx)
-        testb $3,UREGS_cs(%rsp)
-        jz    restore_all_xen
-        movq  VCPU_domain(%rbx),%rax
-        cmpb  $0, DOMAIN_is_32bit_pv(%rax)
-        je    test_all_events
-        jmp   compat_test_all_events
-
 ENTRY(page_fault)
         movl  $TRAP_page_fault,4(%rsp)
 /* No special register assumptions. */
@@ -888,22 +903,6 @@ ENTRY(machine_check)
         movl  $TRAP_machine_check,4(%rsp)
         jmp   handle_ist_exception
 
-/* Enable NMIs.  No special register assumptions. Only %rax is not preserved. */
-ENTRY(enable_nmis)
-        movq  %rsp, %rax /* Grab RSP before pushing */
-
-        /* Set up stack frame */
-        pushq $0               /* SS */
-        pushq %rax             /* RSP */
-        pushfq                 /* RFLAGS */
-        pushq $__HYPERVISOR_CS /* CS */
-        leaq  1f(%rip),%rax
-        pushq %rax             /* RIP */
-
-        iretq /* Disable the hardware NMI latch */
-1:
-        retq
-
 /* No op trap handler.  Required for kexec crash path. */
 GLOBAL(trap_nop)
         iretq



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v3 6/6] x86: avoid double CR3 reload when switching to guest user mode
  2018-03-13 13:37 [PATCH v3 0/6] x86: Meltdown band-aid overhead reduction Jan Beulich
                   ` (4 preceding siblings ...)
  2018-03-13 13:50 ` [PATCH v3 5/6] x86/XPTI: reduce .text.entry Jan Beulich
@ 2018-03-13 13:50 ` Jan Beulich
  5 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2018-03-13 13:50 UTC (permalink / raw)
  To: xen-devel; +Cc: Juergen Gross, Andrew Cooper

When XPTI is active, the CR3 load in restore_all_guest is sufficient
when switching to user mode, improving in particular system call and
page fault exit paths for the guest.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
---
v2: Add ASSERT(!in_irq()).

--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -220,10 +220,22 @@ int pv_domain_initialise(struct domain *
     return rc;
 }
 
-static void _toggle_guest_pt(struct vcpu *v)
+static void _toggle_guest_pt(struct vcpu *v, bool force_cr3)
 {
+    ASSERT(!in_irq());
+
     v->arch.flags ^= TF_kernel_mode;
     update_cr3(v);
+
+    /*
+     * There's no need to load CR3 here when it is going to be loaded on the
+     * way out to guest mode again anyway, and when the page tables we're
+     * currently on are the kernel ones (whereas when switching to kernel
+     * mode we need to be able to write a bounce frame onto the kernel stack).
+     */
+    if ( !force_cr3 && !(v->arch.flags & TF_kernel_mode) )
+        return;
+
     /* Don't flush user global mappings from the TLB. Don't tick TLB clock. */
     asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" );
 
@@ -253,13 +265,13 @@ void toggle_guest_mode(struct vcpu *v)
     }
     asm volatile ( "swapgs" );
 
-    _toggle_guest_pt(v);
+    _toggle_guest_pt(v, cpu_has_no_xpti);
 }
 
 void toggle_guest_pt(struct vcpu *v)
 {
     if ( !is_pv_32bit_vcpu(v) )
-        _toggle_guest_pt(v);
+        _toggle_guest_pt(v, true);
 }
 
 /*




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 2/6] x86: disable XPTI when RDCL_NO
  2018-03-13 13:48 ` [PATCH v3 2/6] x86: disable XPTI when RDCL_NO Jan Beulich
@ 2018-03-15 15:54   ` Andrew Cooper
  0 siblings, 0 replies; 20+ messages in thread
From: Andrew Cooper @ 2018-03-15 15:54 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Juergen Gross

On 13/03/18 13:48, Jan Beulich wrote:
> Use the respective ARCH_CAPABILITIES MSR bit, but don't expose the MSR
> to guests yet.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Tested-by: Juergen Gross <jgross@suse.com>
> Reviewed-by: Juergen Gross <jgross@suse.com>

Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 3/6] x86: log XPTI enabled status
  2018-03-13 13:48 ` [PATCH v3 3/6] x86: log XPTI enabled status Jan Beulich
@ 2018-03-15 15:56   ` Andrew Cooper
  2018-03-15 16:35     ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2018-03-15 15:56 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Juergen Gross

On 13/03/18 13:48, Jan Beulich wrote:
> At the same time also report the state of the two defined
> ARCH_CAPABILITIES MSR bits. To avoid further complicating the
> conditional around that printk(), drop it (it's a debug level one only
> anyway).
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Tested-by: Juergen Gross <jgross@suse.com>
> Reviewed-by: Juergen Gross <jgross@suse.com>

In principle, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

However, don't we want to take the opportunity to raise the XENLOG_INFO
to something which will be printed by default in a release build, seeing
as that plan fell through the first time?

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 4/6] x86/XPTI: use %r12 to write zero into xen_cr3
  2018-03-13 13:49 ` [PATCH v3 4/6] x86/XPTI: use %r12 to write zero into xen_cr3 Jan Beulich
@ 2018-03-15 16:02   ` Andrew Cooper
  2018-03-15 16:39     ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2018-03-15 16:02 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Juergen Gross

On 13/03/18 13:49, Jan Beulich wrote:
> Now that we zero all registers early on all entry paths, use that to
> avoid a couple of immediates here.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> We may want to consider eliminating a few more $0 this way. But
> especially for byte ones I'm not sure it's worth it, due to the REX
> prefix the use of %r12 would incur.
>
> --- a/xen/arch/x86/x86_64/compat/entry.S
> +++ b/xen/arch/x86/x86_64/compat/entry.S
> @@ -216,7 +216,7 @@ ENTRY(cstar_enter)
>          mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
>          neg   %rcx
>          mov   %rcx, %cr3
> -        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
> +        mov   %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)

It is unreasonable to expect people to realise that this use of %r12 is
for a zero, because there is no write to %r12 visible.  These need some
kind of comment.  Perhaps:

diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S

index f52bffc..14c87a0 100644

--- a/xen/arch/x86/x86_64/compat/entry.S

+++ b/xen/arch/x86/x86_64/compat/entry.S

@@ -215,7 +215,7 @@ ENTRY(cstar_enter)

         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)

         neg   %rcx

         mov   %rcx, %cr3

-        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)

+        movq  %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx) /* Arbitrary zero reg. */

 .Lcstar_cr3_okay:

         sti

 
~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 3/6] x86: log XPTI enabled status
  2018-03-15 15:56   ` Andrew Cooper
@ 2018-03-15 16:35     ` Jan Beulich
  0 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2018-03-15 16:35 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Juergen Gross, xen-devel

>>> On 15.03.18 at 16:56, <andrew.cooper3@citrix.com> wrote:
> On 13/03/18 13:48, Jan Beulich wrote:
>> At the same time also report the state of the two defined
>> ARCH_CAPABILITIES MSR bits. To avoid further complicating the
>> conditional around that printk(), drop it (it's a debug level one only
>> anyway).
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> Tested-by: Juergen Gross <jgross@suse.com>
>> Reviewed-by: Juergen Gross <jgross@suse.com>
> 
> In principle, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
> 
> However, don't we want to take the opportunity to raise the XENLOG_INFO
> to something which will be printed by default in a release build, seeing
> as that plan fell through the first time?

Well, strictly speaking I don't think this patch should alter the
BTI related printk(), and I do think the one added here should
be in sync with the other one. But I wouldn't mind making both
XENLOG_WARNING (or perhaps better omit XENLOG_* altogether
from these two). I could easily alter this before committing, but
none of these can be committed afaict without us reaching
agreement on patch 1 (the dependencies between the patches
are also the reason why I didn't re-order what is still patch 1,
other than I had first intended to do for v3).

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 4/6] x86/XPTI: use %r12 to write zero into xen_cr3
  2018-03-15 16:02   ` Andrew Cooper
@ 2018-03-15 16:39     ` Jan Beulich
  2018-03-15 16:41       ` Andrew Cooper
  0 siblings, 1 reply; 20+ messages in thread
From: Jan Beulich @ 2018-03-15 16:39 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Juergen Gross, xen-devel

>>> On 15.03.18 at 17:02, <andrew.cooper3@citrix.com> wrote:
> On 13/03/18 13:49, Jan Beulich wrote:
>> Now that we zero all registers early on all entry paths, use that to
>> avoid a couple of immediates here.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> ---
>> We may want to consider eliminating a few more $0 this way. But
>> especially for byte ones I'm not sure it's worth it, due to the REX
>> prefix the use of %r12 would incur.
>>
>> --- a/xen/arch/x86/x86_64/compat/entry.S
>> +++ b/xen/arch/x86/x86_64/compat/entry.S
>> @@ -216,7 +216,7 @@ ENTRY(cstar_enter)
>>          mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
>>          neg   %rcx
>>          mov   %rcx, %cr3
>> -        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
>> +        mov   %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
> 
> It is unreasonable to expect people to realise that this use of %r12 is
> for a zero, because there is no write to %r12 visible.  These need some
> kind of comment.

Well, okay, I'll add the same comment in all 7 places.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 4/6] x86/XPTI: use %r12 to write zero into xen_cr3
  2018-03-15 16:39     ` Jan Beulich
@ 2018-03-15 16:41       ` Andrew Cooper
  0 siblings, 0 replies; 20+ messages in thread
From: Andrew Cooper @ 2018-03-15 16:41 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Juergen Gross, xen-devel

On 15/03/18 16:39, Jan Beulich wrote:
>>>> On 15.03.18 at 17:02, <andrew.cooper3@citrix.com> wrote:
>> On 13/03/18 13:49, Jan Beulich wrote:
>>> Now that we zero all registers early on all entry paths, use that to
>>> avoid a couple of immediates here.
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>> ---
>>> We may want to consider eliminating a few more $0 this way. But
>>> especially for byte ones I'm not sure it's worth it, due to the REX
>>> prefix the use of %r12 would incur.
>>>
>>> --- a/xen/arch/x86/x86_64/compat/entry.S
>>> +++ b/xen/arch/x86/x86_64/compat/entry.S
>>> @@ -216,7 +216,7 @@ ENTRY(cstar_enter)
>>>          mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
>>>          neg   %rcx
>>>          mov   %rcx, %cr3
>>> -        movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
>>> +        mov   %r12, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
>> It is unreasonable to expect people to realise that this use of %r12 is
>> for a zero, because there is no write to %r12 visible.  These need some
>> kind of comment.
> Well, okay, I'll add the same comment in all 7 places.

With that change, Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 5/6] x86/XPTI: reduce .text.entry
  2018-03-13 13:50 ` [PATCH v3 5/6] x86/XPTI: reduce .text.entry Jan Beulich
@ 2018-03-15 17:10   ` Andrew Cooper
  2018-03-16  7:10     ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Andrew Cooper @ 2018-03-15 17:10 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Juergen Gross

On 13/03/18 13:50, Jan Beulich wrote:
> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -14,8 +14,6 @@
>  #include <public/xen.h>
>  #include <irq_vectors.h>
>  
> -        .section .text.entry, "ax", @progbits
> -
>  /* %rbx: struct vcpu */
>  ENTRY(switch_to_kernel)
>          leaq  VCPU_trap_bounce(%rbx),%rdx
> @@ -34,8 +32,107 @@ ENTRY(switch_to_kernel)
>          movb  %cl,TRAPBOUNCE_flags(%rdx)
>          call  create_bounce_frame
>          andl  $~X86_EFLAGS_DF,UREGS_eflags(%rsp)

Newline here please, as test_all_events is logically a separate thing. 
It might be worth using an ALIGN, given how many jmps land here.

Otherwise, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

> +/* %rbx: struct vcpu */
> +test_all_events:
> +        ASSERT_NOT_IN_ATOMIC
> +        cli                             # tests must not race interrupts
> +/*test_softirqs:*/
> +        movl  VCPU_processor(%rbx), %eax
> +        shll  $IRQSTAT_shift, %eax
> +        leaq  irq_stat+IRQSTAT_softirq_pending(%rip), %rcx
> +        cmpl  $0, (%rcx, %rax, 1)
> +        jne   process_softirqs
> +        cmpb  $0, VCPU_mce_pending(%rbx)
> +        jne   process_mce
> +.Ltest_guest_nmi:
> +        cmpb  $0, VCPU_nmi_pending(%rbx)
> +        jne   process_nmi
> +test_guest_events:
> +        movq  VCPU_vcpu_info(%rbx), %rax
> +        movzwl VCPUINFO_upcall_pending(%rax), %eax
> +        decl  %eax
> +        cmpl  $0xfe, %eax
> +        ja    restore_all_guest
> +/*process_guest_events:*/
> +        sti
> +        leaq  VCPU_trap_bounce(%rbx), %rdx
> +        movq  VCPU_event_addr(%rbx), %rax
> +        movq  %rax, TRAPBOUNCE_eip(%rdx)
> +        movb  $TBF_INTERRUPT, TRAPBOUNCE_flags(%rdx)
> +        call  create_bounce_frame
>          jmp   test_all_events
>  
> +        ALIGN
> +/* %rbx: struct vcpu */
> +process_softirqs:
> +        sti
> +        call do_softirq
> +        jmp  test_all_events
> +
> +        ALIGN
> +/* %rbx: struct vcpu */
> +process_mce:
> +        testb $1 << VCPU_TRAP_MCE, VCPU_async_exception_mask(%rbx)
> +        jnz  .Ltest_guest_nmi
> +        sti
> +        movb $0, VCPU_mce_pending(%rbx)
> +        call set_guest_machinecheck_trapbounce
> +        test %eax, %eax
> +        jz   test_all_events
> +        movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the
> +        movb %dl, VCPU_mce_old_mask(%rbx)            # iret hypercall
> +        orl  $1 << VCPU_TRAP_MCE, %edx
> +        movb %dl, VCPU_async_exception_mask(%rbx)
> +        jmp  process_trap
> +
> +        ALIGN
> +/* %rbx: struct vcpu */
> +process_nmi:
> +        testb $1 << VCPU_TRAP_NMI, VCPU_async_exception_mask(%rbx)
> +        jnz  test_guest_events
> +        sti
> +        movb $0, VCPU_nmi_pending(%rbx)
> +        call set_guest_nmi_trapbounce
> +        test %eax, %eax
> +        jz   test_all_events
> +        movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the
> +        movb %dl, VCPU_nmi_old_mask(%rbx)            # iret hypercall
> +        orl  $1 << VCPU_TRAP_NMI, %edx
> +        movb %dl, VCPU_async_exception_mask(%rbx)
> +        /* FALLTHROUGH */
> +process_trap:
> +        leaq VCPU_trap_bounce(%rbx), %rdx
> +        call create_bounce_frame
> +        jmp  test_all_events
> +
> +/* No special register assumptions. */
> +ENTRY(ret_from_intr)
> +        GET_CURRENT(bx)
> +        testb $3, UREGS_cs(%rsp)
> +        jz    restore_all_xen
> +        movq  VCPU_domain(%rbx), %rax
> +        cmpb  $0, DOMAIN_is_32bit_pv(%rax)
> +        je    test_all_events
> +        jmp   compat_test_all_events
> +
> +/* Enable NMIs.  No special register assumptions. Only %rax is not preserved. */
> +ENTRY(enable_nmis)
> +        movq  %rsp, %rax /* Grab RSP before pushing */
> +
> +        /* Set up stack frame */
> +        pushq $0               /* SS */
> +        pushq %rax             /* RSP */
> +        pushfq                 /* RFLAGS */
> +        pushq $__HYPERVISOR_CS /* CS */
> +        leaq  1f(%rip), %rax
> +        pushq %rax             /* RIP */
> +
> +        iretq /* Disable the hardware NMI latch */
> +1:
> +        ret
> +
> +        .section .text.entry, "ax", @progbits
> +
>  /* %rbx: struct vcpu, interrupts disabled */
>  restore_all_guest:
>          ASSERT_INTERRUPTS_DISABLED
>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 1/6] x86: NOP out XPTI entry/exit code when it's not in use
  2018-03-13 13:47 ` [PATCH v3 1/6] x86: NOP out XPTI entry/exit code when it's not in use Jan Beulich
@ 2018-03-15 19:44   ` Andrew Cooper
  2018-03-16  5:45     ` Juergen Gross
                       ` (2 more replies)
  0 siblings, 3 replies; 20+ messages in thread
From: Andrew Cooper @ 2018-03-15 19:44 UTC (permalink / raw)
  To: Jan Beulich, xen-devel; +Cc: Juergen Gross

On 13/03/18 13:47, Jan Beulich wrote:
> Introduce a synthetic feature flag to use alternative instruction
> patching to NOP out all code on entry/exit paths. Having NOPs here is
> generally better than using conditional branches.
>
> Also change the limit on the number of bytes we can patch in one go to
> that resulting from the encoding in struct alt_instr - there's no point
> reducing it below that limit, and without a check being in place that
> the limit isn't actually exceeded, such an artificial boundary is a
> latent risk.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> Tested-by: Juergen Gross <jgross@suse.com>
> Reviewed-by: Juergen Gross <jgross@suse.com>

I'm afraid that I still have misgivings about this patch.

While I'm quite willing to trust that it functions correctly, it is
taking a some code which is almost impossible to follow already, and
making it substantially more complicated to follow, for what appears to
be a fractional gain.

The two distinct areas of concern are the split interrupt re-enablement
(which really doesn't buy us anything useful), and how obvious the
nopping is (where in the .Lxcpt_cr3_start case, the ALTERNATIVE_NOP is
111 lines (!) away from the code it applies to).

I.e. I'm struggling to decide whether it falls into the category of
unnecessary micro-optimisation or not.

Therefore, I'd like to consider what other XPTI changes we're expecting
to get, and whether those have an impact.

I've got a patch (which I've not had time to submit upstream yet, but
would like to get in for 4.11) which implements a crude "no XPTI for
dom0" mode.  Performance testing shows that in scenarios running only
HVM guests (a very common XenServer setup), the difference between dom0
XPTI-ness can be up to 40% in terms of aggregate network/disk throughput.

OTOH, I expect my patch will likely change based on Juergen's series
(which I should probably stop using as an excuse to defer).  What other
changes are we expecting?

~Andrew

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 1/6] x86: NOP out XPTI entry/exit code when it's not in use
  2018-03-15 19:44   ` Andrew Cooper
@ 2018-03-16  5:45     ` Juergen Gross
  2018-03-16  9:46     ` Jan Beulich
       [not found]     ` <5AABA09002000078001B29C7@suse.com>
  2 siblings, 0 replies; 20+ messages in thread
From: Juergen Gross @ 2018-03-16  5:45 UTC (permalink / raw)
  To: Andrew Cooper, Jan Beulich, xen-devel

On 15/03/18 20:44, Andrew Cooper wrote:
> On 13/03/18 13:47, Jan Beulich wrote:
>> Introduce a synthetic feature flag to use alternative instruction
>> patching to NOP out all code on entry/exit paths. Having NOPs here is
>> generally better than using conditional branches.
>>
>> Also change the limit on the number of bytes we can patch in one go to
>> that resulting from the encoding in struct alt_instr - there's no point
>> reducing it below that limit, and without a check being in place that
>> the limit isn't actually exceeded, such an artificial boundary is a
>> latent risk.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> Tested-by: Juergen Gross <jgross@suse.com>
>> Reviewed-by: Juergen Gross <jgross@suse.com>
> 
> I'm afraid that I still have misgivings about this patch.
> 
> While I'm quite willing to trust that it functions correctly, it is
> taking a some code which is almost impossible to follow already, and
> making it substantially more complicated to follow, for what appears to
> be a fractional gain.
> 
> The two distinct areas of concern are the split interrupt re-enablement
> (which really doesn't buy us anything useful), and how obvious the
> nopping is (where in the .Lxcpt_cr3_start case, the ALTERNATIVE_NOP is
> 111 lines (!) away from the code it applies to).
> 
> I.e. I'm struggling to decide whether it falls into the category of
> unnecessary micro-optimisation or not.
> 
> Therefore, I'd like to consider what other XPTI changes we're expecting
> to get, and whether those have an impact.
> 
> I've got a patch (which I've not had time to submit upstream yet, but
> would like to get in for 4.11) which implements a crude "no XPTI for
> dom0" mode.  Performance testing shows that in scenarios running only
> HVM guests (a very common XenServer setup), the difference between dom0
> XPTI-ness can be up to 40% in terms of aggregate network/disk throughput.

That's something my series is covering already.

Right now I'm hunting a bug in my PCID patch (the last of my series)
which seems to relate to the INVPCID adaptions I've made addressing the
comments to V2 of my series.

I can post my series without the last patch if you want. This will give
a speedup of the simple compilation benchmark of about 20% system time
and 12% elapsed time. And it will have the dom0 without XPTI option.


Juergen

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 5/6] x86/XPTI: reduce .text.entry
  2018-03-15 17:10   ` Andrew Cooper
@ 2018-03-16  7:10     ` Jan Beulich
  0 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2018-03-16  7:10 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Juergen Gross, xen-devel

>>> On 15.03.18 at 18:10, <andrew.cooper3@citrix.com> wrote:
> On 13/03/18 13:50, Jan Beulich wrote:
>> --- a/xen/arch/x86/x86_64/entry.S
>> +++ b/xen/arch/x86/x86_64/entry.S
>> @@ -14,8 +14,6 @@
>>  #include <public/xen.h>
>>  #include <irq_vectors.h>
>>  
>> -        .section .text.entry, "ax", @progbits
>> -
>>  /* %rbx: struct vcpu */
>>  ENTRY(switch_to_kernel)
>>          leaq  VCPU_trap_bounce(%rbx),%rdx
>> @@ -34,8 +32,107 @@ ENTRY(switch_to_kernel)
>>          movb  %cl,TRAPBOUNCE_flags(%rdx)
>>          call  create_bounce_frame
>>          andl  $~X86_EFLAGS_DF,UREGS_eflags(%rsp)
> 
> Newline here please, as test_all_events is logically a separate thing. 

I actually disagree in cases where the label sits in the middle of
straight line code.

> It might be worth using an ALIGN, given how many jmps land here.

That'll again get us into the discussion of suitable NOPs. We'd
have to extend ALIGN by ALTERNATIVE_NOP for the result to
be as desired for all cases. Generally I'd expect straight line
execution here to be (one of) the most common ways to reach
that label.

Furthermore I intentionally didn't want to alter any of the moved
code, other than its spelling/formatting.

> Otherwise, Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>

I'll wait with recording this until the above was clarified.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 1/6] x86: NOP out XPTI entry/exit code when it's not in use
  2018-03-15 19:44   ` Andrew Cooper
  2018-03-16  5:45     ` Juergen Gross
@ 2018-03-16  9:46     ` Jan Beulich
       [not found]     ` <5AABA09002000078001B29C7@suse.com>
  2 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2018-03-16  9:46 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Juergen Gross, xen-devel

>>> On 15.03.18 at 20:44, <andrew.cooper3@citrix.com> wrote:
> On 13/03/18 13:47, Jan Beulich wrote:
>> Introduce a synthetic feature flag to use alternative instruction
>> patching to NOP out all code on entry/exit paths. Having NOPs here is
>> generally better than using conditional branches.
>>
>> Also change the limit on the number of bytes we can patch in one go to
>> that resulting from the encoding in struct alt_instr - there's no point
>> reducing it below that limit, and without a check being in place that
>> the limit isn't actually exceeded, such an artificial boundary is a
>> latent risk.
>>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>> Tested-by: Juergen Gross <jgross@suse.com>
>> Reviewed-by: Juergen Gross <jgross@suse.com>
> 
> I'm afraid that I still have misgivings about this patch.
> 
> While I'm quite willing to trust that it functions correctly, it is
> taking a some code which is almost impossible to follow already, and
> making it substantially more complicated to follow, for what appears to
> be a fractional gain.
> 
> The two distinct areas of concern are the split interrupt re-enablement
> (which really doesn't buy us anything useful),

While I think it is the correct thing to do (restore as much original
behavior as possible), I'm willing to give up on this or split off
those changes to a separate patch. (I now also realize I've coded
this in a more complicated than necessary form - there's no need
to use ALTERNATIVE_NOP in those cases, plain ALTERNATIVE will do.)

> and how obvious the
> nopping is (where in the .Lxcpt_cr3_start case, the ALTERNATIVE_NOP is
> 111 lines (!) away from the code it applies to).

Well, there's no alternative to this when we want to NOP out all
respective code. That's because the patching needs to be done
in a certain sequence in order to be safe.

However, we could decide to not NOP out the (I think) 4 instances
of restoring xen_cr3, on the basis that %r15 is zero with the other
code NOPed out. That would allow moving the ALTERNATIVE_NOP
instances right to where they apply.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 1/6] x86: NOP out XPTI entry/exit code when it's not in use
       [not found]     ` <5AABA09002000078001B29C7@suse.com>
@ 2018-03-16 10:37       ` Juergen Gross
  2018-03-16 10:43         ` Jan Beulich
  0 siblings, 1 reply; 20+ messages in thread
From: Juergen Gross @ 2018-03-16 10:37 UTC (permalink / raw)
  To: Jan Beulich, Andrew Cooper; +Cc: xen-devel

On 16/03/18 10:46, Jan Beulich wrote:
>>>> On 15.03.18 at 20:44, <andrew.cooper3@citrix.com> wrote:
>> On 13/03/18 13:47, Jan Beulich wrote:
>>> Introduce a synthetic feature flag to use alternative instruction
>>> patching to NOP out all code on entry/exit paths. Having NOPs here is
>>> generally better than using conditional branches.
>>>
>>> Also change the limit on the number of bytes we can patch in one go to
>>> that resulting from the encoding in struct alt_instr - there's no point
>>> reducing it below that limit, and without a check being in place that
>>> the limit isn't actually exceeded, such an artificial boundary is a
>>> latent risk.
>>>
>>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>>> Tested-by: Juergen Gross <jgross@suse.com>
>>> Reviewed-by: Juergen Gross <jgross@suse.com>
>>
>> I'm afraid that I still have misgivings about this patch.
>>
>> While I'm quite willing to trust that it functions correctly, it is
>> taking a some code which is almost impossible to follow already, and
>> making it substantially more complicated to follow, for what appears to
>> be a fractional gain.
>>
>> The two distinct areas of concern are the split interrupt re-enablement
>> (which really doesn't buy us anything useful),
> 
> While I think it is the correct thing to do (restore as much original
> behavior as possible), I'm willing to give up on this or split off
> those changes to a separate patch. (I now also realize I've coded
> this in a more complicated than necessary form - there's no need
> to use ALTERNATIVE_NOP in those cases, plain ALTERNATIVE will do.)
> 
>> and how obvious the
>> nopping is (where in the .Lxcpt_cr3_start case, the ALTERNATIVE_NOP is
>> 111 lines (!) away from the code it applies to).
> 
> Well, there's no alternative to this when we want to NOP out all
> respective code. That's because the patching needs to be done
> in a certain sequence in order to be safe.
> 
> However, we could decide to not NOP out the (I think) 4 instances
> of restoring xen_cr3, on the basis that %r15 is zero with the other
> code NOPed out. That would allow moving the ALTERNATIVE_NOP
> instances right to where they apply.

I think we should do some performance testing to decide whether it makes
sense to take the patch or not. After all I don't see a reason to punish
AMD processors for INTEL's bugs. And I've already found out that some
branches in interrupt handling can really make a difference.


Juergen


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3 1/6] x86: NOP out XPTI entry/exit code when it's not in use
  2018-03-16 10:37       ` Juergen Gross
@ 2018-03-16 10:43         ` Jan Beulich
  0 siblings, 0 replies; 20+ messages in thread
From: Jan Beulich @ 2018-03-16 10:43 UTC (permalink / raw)
  To: Juergen Gross; +Cc: Andrew Cooper, xen-devel

>>> On 16.03.18 at 11:37, <jgross@suse.com> wrote:
> I think we should do some performance testing to decide whether it makes
> sense to take the patch or not. After all I don't see a reason to punish
> AMD processors for INTEL's bugs. And I've already found out that some
> branches in interrupt handling can really make a difference.

Well, I had done that before first posting, and found a marginal
improvement. But of course I didn't do this with parts of the patch
used and parts left out, and to be honest I don't think I'm going
to, as the differences will likely disappear in the noise. Hence I
stand to what I've said earlier to Andrew - I could accept to split
the patch (to move out the STI patching), and I could also accept
to drop the patching of the xen_cr3 restores. But I think the big
chunks being patched should at least be taken.

Jan


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2018-03-16 10:43 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-13 13:37 [PATCH v3 0/6] x86: Meltdown band-aid overhead reduction Jan Beulich
2018-03-13 13:47 ` [PATCH v3 1/6] x86: NOP out XPTI entry/exit code when it's not in use Jan Beulich
2018-03-15 19:44   ` Andrew Cooper
2018-03-16  5:45     ` Juergen Gross
2018-03-16  9:46     ` Jan Beulich
     [not found]     ` <5AABA09002000078001B29C7@suse.com>
2018-03-16 10:37       ` Juergen Gross
2018-03-16 10:43         ` Jan Beulich
2018-03-13 13:48 ` [PATCH v3 2/6] x86: disable XPTI when RDCL_NO Jan Beulich
2018-03-15 15:54   ` Andrew Cooper
2018-03-13 13:48 ` [PATCH v3 3/6] x86: log XPTI enabled status Jan Beulich
2018-03-15 15:56   ` Andrew Cooper
2018-03-15 16:35     ` Jan Beulich
2018-03-13 13:49 ` [PATCH v3 4/6] x86/XPTI: use %r12 to write zero into xen_cr3 Jan Beulich
2018-03-15 16:02   ` Andrew Cooper
2018-03-15 16:39     ` Jan Beulich
2018-03-15 16:41       ` Andrew Cooper
2018-03-13 13:50 ` [PATCH v3 5/6] x86/XPTI: reduce .text.entry Jan Beulich
2018-03-15 17:10   ` Andrew Cooper
2018-03-16  7:10     ` Jan Beulich
2018-03-13 13:50 ` [PATCH v3 6/6] x86: avoid double CR3 reload when switching to guest user mode Jan Beulich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.