All of lore.kernel.org
 help / color / mirror / Atom feed
From: Juergen Gross <jgross@suse.com>
To: xen-devel@lists.xenproject.org
Cc: Juergen Gross <jgross@suse.com>,
	andrew.cooper3@citrix.com, wei.liu2@citrix.com,
	jbeulich@suse.com, dfaggioli@suse.com
Subject: [PATCH v2 5/6] xen/x86: use flag byte for decision whether xen_cr3 is valid
Date: Fri,  2 Mar 2018 09:14:02 +0100	[thread overview]
Message-ID: <20180302081403.16953-6-jgross@suse.com> (raw)
In-Reply-To: <20180302081403.16953-1-jgross@suse.com>

Today cpu_info->xen_cr3 is either 0 to indicate %cr3 doesn't need to
be switched on entry to Xen, or negative for keeping the value while
indicating not to restore %cr3, or positive in case %cr3 is to be
restored.

Switch to use a flag byte instead of a negative xen_cr3 value in order
to allow %cr3 values with the high bit set in case we want to keep TLB
entries when using the PCID feature.

This reduces the number of branches in interrupt handling and results
in better performance (e.g. parallel make of the Xen hypervisor on my
system was using about 3% less system time).

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 xen/arch/x86/domain.c              |  1 +
 xen/arch/x86/mm.c                  |  1 +
 xen/arch/x86/smpboot.c             |  2 ++
 xen/arch/x86/x86_64/asm-offsets.c  |  1 +
 xen/arch/x86/x86_64/compat/entry.S |  5 ++--
 xen/arch/x86/x86_64/entry.S        | 59 ++++++++++++++++----------------------
 xen/include/asm-x86/current.h      | 11 ++++---
 7 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 1f8b08ef02..c01ae60296 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1698,6 +1698,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
     ASSERT(local_irq_is_enabled());
 
     get_cpu_info()->xen_cr3 = 0;
+    get_cpu_info()->use_xen_cr3 = false;
 
     if ( unlikely(dirty_cpu != cpu) && dirty_cpu != VCPU_CPU_CLEAN )
     {
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 0d0badea86..2d8366a01c 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -520,6 +520,7 @@ void write_ptbase(struct vcpu *v)
     {
         get_cpu_info()->root_pgt_changed = false;
         /* Make sure to clear xen_cr3 before pv_cr3; write_cr3() serializes. */
+        get_cpu_info()->use_xen_cr3 = false;
         get_cpu_info()->xen_cr3 = 0;
         write_cr3(v->arch.cr3);
         get_cpu_info()->pv_cr3 = 0;
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 60604f4535..1b665991bd 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -330,6 +330,7 @@ void start_secondary(void *unused)
      */
     spin_debug_disable();
 
+    get_cpu_info()->use_xen_cr3 = false;
     get_cpu_info()->xen_cr3 = 0;
     get_cpu_info()->pv_cr3 = 0;
 
@@ -1129,6 +1130,7 @@ void __init smp_prepare_boot_cpu(void)
     per_cpu(scratch_cpumask, cpu) = &scratch_cpu0mask;
 #endif
 
+    get_cpu_info()->use_xen_cr3 = false;
     get_cpu_info()->xen_cr3 = 0;
     get_cpu_info()->pv_cr3 = 0;
 }
diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
index c9225b06c1..d276f70a75 100644
--- a/xen/arch/x86/x86_64/asm-offsets.c
+++ b/xen/arch/x86/x86_64/asm-offsets.c
@@ -145,6 +145,7 @@ void __dummy__(void)
     OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl);
     OFFSET(CPUINFO_bti_ist_info, struct cpu_info, bti_ist_info);
     OFFSET(CPUINFO_root_pgt_changed, struct cpu_info, root_pgt_changed);
+    OFFSET(CPUINFO_use_xen_cr3, struct cpu_info, use_xen_cr3);
     DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
     BLANK();
 
diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
index 27cdf244e6..74055eade8 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -218,10 +218,9 @@ ENTRY(cstar_enter)
         GET_STACK_END(bx)
 .Lcstar_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
-        neg   %rcx
+        test  %rcx, %rcx
         jz    .Lcstar_cr3_okay
-        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
-        neg   %rcx
+        movb  $0, STACK_CPUINFO_FIELD(use_xen_cr3)(%rbx)
         mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lcstar_cr3_okay:
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index a8d38e7eb2..2c9ce8d821 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -74,6 +74,7 @@ restore_all_guest:
         rep movsq
 .Lrag_copy_done:
         mov   %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
+        movb  $1, STACK_CPUINFO_FIELD(use_xen_cr3)(%rdx)
         mov   %rax, %cr3
 .Lrag_cr3_end:
         ALTERNATIVE_NOP .Lrag_cr3_start, .Lrag_cr3_end, X86_FEATURE_NO_XPTI
@@ -123,14 +124,9 @@ restore_all_xen:
          * case we return to late PV exit code (from an NMI or #MC).
          */
         GET_STACK_END(bx)
-        mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rdx
+        cmpb  $0, STACK_CPUINFO_FIELD(use_xen_cr3)(%rbx)
+UNLIKELY_START(ne, exit_cr3)
         mov   STACK_CPUINFO_FIELD(pv_cr3)(%rbx), %rax
-        test  %rdx, %rdx
-        /*
-         * Ideally the condition would be "nsz", but such doesn't exist,
-         * so "g" will have to do.
-         */
-UNLIKELY_START(g, exit_cr3)
         mov   %rax, %cr3
 UNLIKELY_END(exit_cr3)
 
@@ -173,10 +169,9 @@ ENTRY(lstar_enter)
         GET_STACK_END(bx)
 .Llstar_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
-        neg   %rcx
+        test  %rcx, %rcx
         jz    .Llstar_cr3_okay
-        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
-        neg   %rcx
+        movb  $0, STACK_CPUINFO_FIELD(use_xen_cr3)(%rbx)
         mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Llstar_cr3_okay:
@@ -283,10 +278,9 @@ GLOBAL(sysenter_eflags_saved)
         GET_STACK_END(bx)
 .Lsyse_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
-        neg   %rcx
+        test  %rcx, %rcx
         jz    .Lsyse_cr3_okay
-        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
-        neg   %rcx
+        movb  $0, STACK_CPUINFO_FIELD(use_xen_cr3)(%rbx)
         mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lsyse_cr3_okay:
@@ -336,10 +330,9 @@ ENTRY(int80_direct_trap)
         GET_STACK_END(bx)
 .Lint80_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%rbx), %rcx
-        neg   %rcx
+        test  %rcx, %rcx
         jz    .Lint80_cr3_okay
-        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
-        neg   %rcx
+        movb  $0, STACK_CPUINFO_FIELD(use_xen_cr3)(%rbx)
         mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%rbx)
 .Lint80_cr3_okay:
@@ -523,18 +516,17 @@ ENTRY(common_interrupt)
 
 .Lintr_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
+        mov   STACK_CPUINFO_FIELD(use_xen_cr3)(%r14), %bl
         mov   %rcx, %r15
-        neg   %rcx
+        test  %rcx, %rcx
         jz    .Lintr_cr3_okay
-        jns   .Lintr_cr3_load
-        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
-        neg   %rcx
-.Lintr_cr3_load:
+        movb  $0, STACK_CPUINFO_FIELD(use_xen_cr3)(%r14)
         mov   %rcx, %cr3
         xor   %ecx, %ecx
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         testb $3, UREGS_cs(%rsp)
         cmovnz %rcx, %r15
+        cmovnz %cx, %bx
 .Lintr_cr3_okay:
 
         CR4_PV32_RESTORE
@@ -542,6 +534,7 @@ ENTRY(common_interrupt)
         callq do_IRQ
 .Lintr_cr3_restore:
         mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+        mov   %bl, STACK_CPUINFO_FIELD(use_xen_cr3)(%r14)
 .Lintr_cr3_end:
         jmp ret_from_intr
 
@@ -571,18 +564,17 @@ GLOBAL(handle_exception)
 
 .Lxcpt_cr3_start:
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
+        mov   STACK_CPUINFO_FIELD(use_xen_cr3)(%r14), %r13b
         mov   %rcx, %r15
-        neg   %rcx
+        test  %rcx, %rcx
         jz    .Lxcpt_cr3_okay
-        jns   .Lxcpt_cr3_load
-        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
-        neg   %rcx
-.Lxcpt_cr3_load:
+        movb  $0, STACK_CPUINFO_FIELD(use_xen_cr3)(%r14)
         mov   %rcx, %cr3
         xor   %ecx, %ecx
         mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         testb $3, UREGS_cs(%rsp)
         cmovnz %rcx, %r15
+        cmovnz %rcx, %r13
 .Lxcpt_cr3_okay:
 
 handle_exception_saved:
@@ -652,6 +644,7 @@ handle_exception_saved:
         INDIRECT_CALL %rdx
 .Lxcpt_cr3_restore1:
         mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+        mov   %r13b, STACK_CPUINFO_FIELD(use_xen_cr3)(%r14)
 .Lxcpt_cr3_end1:
         testb $3,UREGS_cs(%rsp)
         jz    restore_all_xen
@@ -687,6 +680,7 @@ exception_with_ints_disabled:
         movq  %rax,UREGS_kernel_sizeof(%rsp)
 .Lxcpt_cr3_restore2:
         mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+        mov   %r13b, STACK_CPUINFO_FIELD(use_xen_cr3)(%r14)
 .Lxcpt_cr3_end2:
         jmp   restore_all_xen           # return to fixup code
 
@@ -781,9 +775,6 @@ ENTRY(double_fault)
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx
         test  %rbx, %rbx
         jz    .Ldblf_cr3_okay
-        jns   .Ldblf_cr3_load
-        neg   %rbx
-.Ldblf_cr3_load:
         mov   %rbx, %cr3
 .Ldblf_cr3_okay:
 
@@ -812,13 +803,11 @@ handle_ist_exception:
         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
 
         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx
+        mov   STACK_CPUINFO_FIELD(use_xen_cr3)(%r14), %bl
         mov   %rcx, %r15
-        neg   %rcx
+        test  %rcx, %rcx
         jz    .List_cr3_okay
-        jns   .List_cr3_load
-        mov   %rcx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
-        neg   %rcx
-.List_cr3_load:
+        movb  $0, STACK_CPUINFO_FIELD(use_xen_cr3)(%r14)
         mov   %rcx, %cr3
         movq  $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
 .List_cr3_okay:
@@ -831,6 +820,7 @@ handle_ist_exception:
          * and copy the context to stack bottom.
          */
         xor   %r15, %r15
+        xor   %bl, %bl
         GET_CPUINFO_FIELD(guest_cpu_user_regs,di)
         movq  %rsp,%rsi
         movl  $UREGS_kernel_sizeof/8,%ecx
@@ -842,6 +832,7 @@ handle_ist_exception:
         mov   (%rdx, %rax, 8), %rdx
         INDIRECT_CALL %rdx
         mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
+        mov   %bl, STACK_CPUINFO_FIELD(use_xen_cr3)(%r14)
         cmpb  $TRAP_nmi,UREGS_entry_vector(%rsp)
         jne   ret_from_intr
 
diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
index d5236c82de..34a9512d93 100644
--- a/xen/include/asm-x86/current.h
+++ b/xen/include/asm-x86/current.h
@@ -47,10 +47,7 @@ struct cpu_info {
      * context is being entered. A value of zero indicates no setting of CR3
      * is to be performed.
      * The former is the value to restore when re-entering Xen, if any. IOW
-     * its value being zero means there's nothing to restore. However, its
-     * value can also be negative, indicating to the exit-to-Xen code that
-     * restoring is not necessary, but allowing any nested entry code paths
-     * to still know the value to put back into CR3.
+     * its value being zero means there's nothing to restore.
      */
     unsigned long xen_cr3;
     unsigned long pv_cr3;
@@ -68,6 +65,12 @@ struct cpu_info {
      */
     bool         root_pgt_changed;
 
+    /*
+     * use_xen_cr3 is set in case the value of xen_cr3 is to be written into
+     * CR3 when entering the hypervisor.
+     */
+    bool         use_xen_cr3;
+
     unsigned long __pad;
     /* get_stack_bottom() must be 16-byte aligned */
 };
-- 
2.13.6


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  parent reply	other threads:[~2018-03-02  8:14 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-02  8:13 [PATCH v2 0/6] xen/x86: various XPTI speedups Juergen Gross
2018-03-02  8:13 ` [PATCH v2 1/6] x86/xpti: avoid copying L4 page table contents when possible Juergen Gross
2018-03-05 16:43   ` Jan Beulich
2018-03-08 11:59     ` Juergen Gross
2018-03-08 12:47       ` Jan Beulich
     [not found]       ` <5AA13EEA02000078001AFCAF@suse.com>
2018-03-08 13:03         ` Juergen Gross
     [not found]   ` <5A9D81DC02000078001AEB68@suse.com>
2018-03-06  7:01     ` Juergen Gross
2018-03-06  7:58       ` Jan Beulich
     [not found]       ` <5A9E583002000078001AED3A@suse.com>
2018-03-06  8:06         ` Juergen Gross
2018-03-06  8:17           ` Jan Beulich
2018-03-02  8:13 ` [PATCH v2 2/6] x86/xpti: don't flush TLB twice when switching to 64-bit pv context Juergen Gross
2018-03-05 16:49   ` Jan Beulich
     [not found]   ` <5A9D831F02000078001AEB7E@suse.com>
2018-03-06  7:02     ` Juergen Gross
2018-03-02  8:14 ` [PATCH v2 3/6] xen/x86: support per-domain flag for xpti Juergen Gross
2018-03-08 10:17   ` Jan Beulich
     [not found]   ` <5AA11BDE02000078001AFB92@suse.com>
2018-03-08 11:30     ` Juergen Gross
2018-03-08 12:49       ` Jan Beulich
     [not found]       ` <5AA13F7D02000078001AFCB3@suse.com>
2018-03-08 13:13         ` Juergen Gross
2018-03-02  8:14 ` [PATCH v2 4/6] xen/x86: disable global pages for domains with XPTI active Juergen Gross
2018-03-02 11:03   ` Wei Liu
2018-03-02 11:30     ` Juergen Gross
2018-03-08 13:38   ` Jan Beulich
2018-03-09  3:01     ` Tian, Kevin
2018-03-09  5:23     ` Tian, Kevin
2018-03-09  8:34       ` Jan Beulich
     [not found]       ` <5AA2551002000078001B0116@suse.com>
2018-03-09  8:42         ` Juergen Gross
     [not found]   ` <5AA14AF302000078001AFD30@suse.com>
2018-03-08 14:05     ` Juergen Gross
2018-03-08 14:33       ` Jan Beulich
     [not found]       ` <5AA157E002000078001AFDA4@suse.com>
2018-03-08 14:39         ` Juergen Gross
2018-03-08 15:06   ` Jan Beulich
2018-03-09 14:40     ` Juergen Gross
2018-03-09 15:30       ` Jan Beulich
2018-03-02  8:14 ` Juergen Gross [this message]
2018-03-08 14:24   ` [PATCH v2 5/6] xen/x86: use flag byte for decision whether xen_cr3 is valid Jan Beulich
     [not found]   ` <5AA155BE02000078001AFD89@suse.com>
2018-03-08 14:28     ` Juergen Gross
2018-03-02  8:14 ` [PATCH v2 6/6] xen/x86: use PCID feature for XPTI Juergen Gross
2018-03-08 15:27   ` Jan Beulich
2018-03-05 16:20 ` [PATCH v2 0/6] xen/x86: various XPTI speedups Dario Faggioli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180302081403.16953-6-jgross@suse.com \
    --to=jgross@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=dfaggioli@suse.com \
    --cc=jbeulich@suse.com \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.