[PATCH] x86: slightly reduce Meltdown band-aid overhead

* [PATCH] x86: slightly reduce Meltdown band-aid overhead
@ 2018-01-18 15:39 Jan Beulich
  2018-01-23 17:39 ` George Dunlap
  2018-01-29 18:55 ` Andrew Cooper
  0 siblings, 2 replies; 5+ messages in thread
From: Jan Beulich @ 2018-01-18 15:39 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap, Andrew Cooper

I'm not sure why I didn't do this right away: By avoiding to make any
of the cloned directmap PTEs global, there's no need to fiddle with
CR4.PGE on any of the entry paths. Only the exit paths need to flush
global mappings.

The reduced flushing, however, implies that we now need to have
interrupts off on all entry paths until after the page table switch, so
that flush IPIs can't arrive with the restricted page tables still
active, but only a non-global flush happening with the CR3 loads. Along
those lines the "sync" IPI after L4 entry updates now needs to become a
real (and global) flush IPI, so that inside Xen we'll also pick up such
changes.

Take the opportunity and also do a GET_CURRENT() -> __GET_CURRENT()
transition the original patch missed.

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -3804,18 +3804,14 @@ long do_mmu_update(
     {
         /*
          * Force other vCPU-s of the affected guest to pick up L4 entry
-         * changes (if any). Issue a flush IPI with empty operation mask to
-         * facilitate this (including ourselves waiting for the IPI to
-         * actually have arrived). Utilize the fact that FLUSH_VA_VALID is
-         * meaningless without FLUSH_CACHE, but will allow to pass the no-op
-         * check in flush_area_mask().
+         * changes (if any).
          */
         unsigned int cpu = smp_processor_id();
         cpumask_t *mask = per_cpu(scratch_cpumask, cpu);
 
         cpumask_andnot(mask, pt_owner->domain_dirty_cpumask, cpumask_of(cpu));
         if ( !cpumask_empty(mask) )
-            flush_area_mask(mask, ZERO_BLOCK_PTR, FLUSH_VA_VALID);
+            flush_mask(mask, FLUSH_TLB_GLOBAL);
     }
 
     perfc_add(num_page_updates, i);
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -726,6 +726,7 @@ static int clone_mapping(const void *ptr
     }
 
     pl1e += l1_table_offset(linear);
+    flags &= ~_PAGE_GLOBAL;
 
     if ( l1e_get_flags(*pl1e) & _PAGE_PRESENT )
     {
@@ -1009,8 +1010,17 @@ void __init smp_prepare_cpus(unsigned in
     if ( rc )
         panic("Error %d setting up PV root page table\n", rc);
     if ( per_cpu(root_pgt, 0) )
+    {
         get_cpu_info()->pv_cr3 = __pa(per_cpu(root_pgt, 0));
 
+        /*
+         * All entry points which may need to switch page tables have to start
+         * with interrupts off. Re-write what pv_trap_init() has put there.
+         */
+        _set_gate(idt_table + LEGACY_SYSCALL_VECTOR, SYS_DESC_irq_gate, 3,
+                  &int80_direct_trap);
+    }
+
     set_nr_sockets();
 
     socket_cpumask = xzalloc_array(cpumask_t *, nr_sockets);
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -189,7 +189,7 @@ ENTRY(compat_post_handle_exception)
 
 /* See lstar_enter for entry register state. */
 ENTRY(cstar_enter)
-        sti
+        /* sti could live here when we don't switch page tables below. */
         CR4_PV32_RESTORE
         movq  8(%rsp),%rax /* Restore %rax. */
         movq  $FLAT_KERNEL_SS,8(%rsp)
@@ -206,11 +206,12 @@ ENTRY(cstar_enter)
         jz    .Lcstar_cr3_okay
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
         neg   %rcx
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lcstar_cr3_okay:
+        sti
 
-        GET_CURRENT(bx)
+        __GET_CURRENT(bx)
         movq  VCPU_domain(%rbx),%rcx
         cmpb  $0,DOMAIN_is_32bit_pv(%rcx)
         je    switch_to_kernel
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -135,7 +135,7 @@ UNLIKELY_END(exit_cr3)
  * %ss must be saved into the space left by the trampoline.
  */
 ENTRY(lstar_enter)
-        sti
+        /* sti could live here when we don't switch page tables below. */
         movq  8(%rsp),%rax /* Restore %rax. */
         movq  $FLAT_KERNEL_SS,8(%rsp)
         pushq %r11
@@ -151,9 +151,10 @@ ENTRY(lstar_enter)
         jz    .Llstar_cr3_okay
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
         neg   %rcx
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Llstar_cr3_okay:
+        sti
 
         __GET_CURRENT(bx)
         testb $TF_kernel_mode,VCPU_thread_flags(%rbx)
@@ -236,7 +237,7 @@ process_trap:
         jmp  test_all_events
 
 ENTRY(sysenter_entry)
-        sti
+        /* sti could live here when we don't switch page tables below. */
         pushq $FLAT_USER_SS
         pushq $0
         pushfq
@@ -254,9 +255,10 @@ GLOBAL(sysenter_eflags_saved)
         jz    .Lsyse_cr3_okay
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
         neg   %rcx
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lsyse_cr3_okay:
+        sti
 
         __GET_CURRENT(bx)
         cmpb  $0,VCPU_sysenter_disables_events(%rbx)
@@ -300,9 +302,10 @@ ENTRY(int80_direct_trap)
         jz    .Lint80_cr3_okay
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
         neg   %rcx
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lint80_cr3_okay:
+        sti
 
         cmpb  $0,untrusted_msi(%rip)
 UNLIKELY_START(ne, msi_check)
@@ -477,7 +480,7 @@ ENTRY(common_interrupt)
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         neg   %rcx
 .Lintr_cr3_load:
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         xor   %ecx, %ecx
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         testb $3, UREGS_cs(%rsp)
@@ -515,7 +518,7 @@ GLOBAL(handle_exception)
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         neg   %rcx
 .Lxcpt_cr3_load:
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         xor   %ecx, %ecx
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         testb $3, UREGS_cs(%rsp)
@@ -707,7 +710,7 @@ ENTRY(double_fault)
         jns   .Ldblf_cr3_load
         neg   %rbx
 .Ldblf_cr3_load:
-        write_cr3 rbx, rdi, rsi
+        mov   %rbx, %cr3
 .Ldblf_cr3_okay:
 
         movq  %rsp,%rdi
@@ -738,7 +741,7 @@ handle_ist_exception:
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         neg   %rcx
 .List_cr3_load:
-        write_cr3 rcx, rdi, rsi
+        mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
 .List_cr3_okay:
 



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread