[PATCH 0/5] x86/pv: Minor perf improvements in segment handling

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/5] x86/pv: Minor perf improvements in segment handling
@ 2020-09-09  9:59 Andrew Cooper
  2020-09-09  9:59 ` [PATCH 1/5] x86/asm: Rename FS/GS base helpers Andrew Cooper
                   ` (4 more replies)
  0 siblings, 5 replies; 18+ messages in thread
From: Andrew Cooper @ 2020-09-09  9:59 UTC (permalink / raw)
  To: Xen-devel
  Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné,
	Wei Liu, Jun Nakajima, Kevin Tian

This is follow-on work from the fixes for Linux 5.9 with FSGSBASE.

Andrew Cooper (5):
  x86/asm: Rename FS/GS base helpers
  x86/asm: Split __wr{fs,gs}base() out of write_{fs,gs}_base()
  x86/pv: Optimise prefetching in svm_load_segs()
  x86/pv: Optimise to the segment context switching paths
  x86/pv: Simplify emulation for the 64bit base MSRs

 xen/arch/x86/domain.c             | 72 +++++++++++++++++++++++++++------------
 xen/arch/x86/hvm/svm/svm.c        | 43 +++++++++++------------
 xen/arch/x86/hvm/vmx/vmx.c        |  8 ++---
 xen/arch/x86/pv/domain.c          |  2 +-
 xen/arch/x86/pv/emul-priv-op.c    | 42 ++++++++++++-----------
 xen/arch/x86/x86_64/mm.c          |  8 ++---
 xen/arch/x86/x86_64/traps.c       |  6 ++--
 xen/include/asm-x86/hvm/svm/svm.h |  5 +--
 xen/include/asm-x86/msr.h         | 42 ++++++++++++++---------
 9 files changed, 135 insertions(+), 93 deletions(-)

-- 
2.11.0



^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/5] x86/asm: Rename FS/GS base helpers
  2020-09-09  9:59 [PATCH 0/5] x86/pv: Minor perf improvements in segment handling Andrew Cooper
@ 2020-09-09  9:59 ` Andrew Cooper
  2020-09-10 14:45   ` Jan Beulich
  2020-09-15  2:50   ` Tian, Kevin
  2020-09-09  9:59 ` [PATCH 2/5] x86/asm: Split __wr{fs, gs}base() out of write_{fs, gs}_base() Andrew Cooper
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 18+ messages in thread
From: Andrew Cooper @ 2020-09-09  9:59 UTC (permalink / raw)
  To: Xen-devel
  Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné,
	Wei Liu, Jun Nakajima, Kevin Tian

They are currently named after the FSGSBASE instructions, but are not thin
wrappers around said instructions, and therefore do not accurately reflect the
logic they perform, especially when it comes to functioning safely in non
FSGSBASE context.

Rename them to {read,write}_{fs,gs}_base() to avoid confusion.

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
CC: Jun Nakajima <jun.nakajima@intel.com>
CC: Kevin Tian <kevin.tian@intel.com>
---
 xen/arch/x86/domain.c          | 10 +++++-----
 xen/arch/x86/hvm/vmx/vmx.c     |  8 ++++----
 xen/arch/x86/pv/domain.c       |  2 +-
 xen/arch/x86/pv/emul-priv-op.c | 14 +++++++-------
 xen/arch/x86/x86_64/mm.c       |  8 ++++----
 xen/arch/x86/x86_64/traps.c    |  6 +++---
 xen/include/asm-x86/msr.h      | 12 ++++++------
 7 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index e8e91cf080..2271bee36a 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1581,9 +1581,9 @@ static void load_segments(struct vcpu *n)
 
     if ( !fs_gs_done && !compat )
     {
-        wrfsbase(n->arch.pv.fs_base);
-        wrgsshadow(n->arch.pv.gs_base_kernel);
-        wrgsbase(n->arch.pv.gs_base_user);
+        write_fs_base(n->arch.pv.fs_base);
+        write_gs_shadow(n->arch.pv.gs_base_kernel);
+        write_gs_base(n->arch.pv.gs_base_user);
 
         /* If in kernel mode then switch the GS bases around. */
         if ( (n->arch.flags & TF_kernel_mode) )
@@ -1710,9 +1710,9 @@ static void save_segments(struct vcpu *v)
 
     if ( !is_pv_32bit_vcpu(v) )
     {
-        unsigned long gs_base = rdgsbase();
+        unsigned long gs_base = read_gs_base();
 
-        v->arch.pv.fs_base = rdfsbase();
+        v->arch.pv.fs_base = read_fs_base();
         if ( v->arch.flags & TF_kernel_mode )
             v->arch.pv.gs_base_kernel = gs_base;
         else
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index c4b40bf3cb..d26e102970 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -512,12 +512,12 @@ static void vmx_save_guest_msrs(struct vcpu *v)
      * We cannot cache SHADOW_GS_BASE while the VCPU runs, as it can
      * be updated at any time via SWAPGS, which we cannot trap.
      */
-    v->arch.hvm.vmx.shadow_gs = rdgsshadow();
+    v->arch.hvm.vmx.shadow_gs = read_gs_shadow();
 }
 
 static void vmx_restore_guest_msrs(struct vcpu *v)
 {
-    wrgsshadow(v->arch.hvm.vmx.shadow_gs);
+    write_gs_shadow(v->arch.hvm.vmx.shadow_gs);
     wrmsrl(MSR_STAR,           v->arch.hvm.vmx.star);
     wrmsrl(MSR_LSTAR,          v->arch.hvm.vmx.lstar);
     wrmsrl(MSR_SYSCALL_MASK,   v->arch.hvm.vmx.sfmask);
@@ -2958,7 +2958,7 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
         break;
 
     case MSR_SHADOW_GS_BASE:
-        *msr_content = rdgsshadow();
+        *msr_content = read_gs_shadow();
         break;
 
     case MSR_STAR:
@@ -3190,7 +3190,7 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
         else if ( msr == MSR_GS_BASE )
             __vmwrite(GUEST_GS_BASE, msr_content);
         else
-            wrgsshadow(msr_content);
+            write_gs_shadow(msr_content);
 
         break;
 
diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
index 44e4ea2582..663e76c773 100644
--- a/xen/arch/x86/pv/domain.c
+++ b/xen/arch/x86/pv/domain.c
@@ -452,7 +452,7 @@ void toggle_guest_mode(struct vcpu *v)
      * Update the cached value of the GS base about to become inactive, as a
      * subsequent context switch won't bother re-reading it.
      */
-    gs_base = rdgsbase();
+    gs_base = read_gs_base();
     if ( v->arch.flags & TF_kernel_mode )
         v->arch.pv.gs_base_kernel = gs_base;
     else
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index a192160f84..9dd1d59423 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -511,10 +511,10 @@ static int read_segment(enum x86_segment seg,
             reg->base = 0;
             break;
         case x86_seg_fs:
-            reg->base = rdfsbase();
+            reg->base = read_fs_base();
             break;
         case x86_seg_gs:
-            reg->base = rdgsbase();
+            reg->base = read_gs_base();
             break;
         }
 
@@ -871,13 +871,13 @@ static int read_msr(unsigned int reg, uint64_t *val,
     case MSR_FS_BASE:
         if ( is_pv_32bit_domain(currd) )
             break;
-        *val = rdfsbase();
+        *val = read_fs_base();
         return X86EMUL_OKAY;
 
     case MSR_GS_BASE:
         if ( is_pv_32bit_domain(currd) )
             break;
-        *val = rdgsbase();
+        *val = read_gs_base();
         return X86EMUL_OKAY;
 
     case MSR_SHADOW_GS_BASE:
@@ -993,19 +993,19 @@ static int write_msr(unsigned int reg, uint64_t val,
     case MSR_FS_BASE:
         if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
             break;
-        wrfsbase(val);
+        write_fs_base(val);
         return X86EMUL_OKAY;
 
     case MSR_GS_BASE:
         if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
             break;
-        wrgsbase(val);
+        write_gs_base(val);
         return X86EMUL_OKAY;
 
     case MSR_SHADOW_GS_BASE:
         if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
             break;
-        wrgsshadow(val);
+        write_gs_shadow(val);
         curr->arch.pv.gs_base_user = val;
         return X86EMUL_OKAY;
 
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index b69cf2dc4f..0d11a9f500 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -1030,7 +1030,7 @@ long do_set_segment_base(unsigned int which, unsigned long base)
     {
     case SEGBASE_FS:
         if ( is_canonical_address(base) )
-            wrfsbase(base);
+            write_fs_base(base);
         else
             ret = -EINVAL;
         break;
@@ -1038,7 +1038,7 @@ long do_set_segment_base(unsigned int which, unsigned long base)
     case SEGBASE_GS_USER:
         if ( is_canonical_address(base) )
         {
-            wrgsshadow(base);
+            write_gs_shadow(base);
             v->arch.pv.gs_base_user = base;
         }
         else
@@ -1047,7 +1047,7 @@ long do_set_segment_base(unsigned int which, unsigned long base)
 
     case SEGBASE_GS_KERNEL:
         if ( is_canonical_address(base) )
-            wrgsbase(base);
+            write_gs_base(base);
         else
             ret = -EINVAL;
         break;
@@ -1096,7 +1096,7 @@ long do_set_segment_base(unsigned int which, unsigned long base)
                        : [flat] "r" (FLAT_USER_DS32) );
 
         /* Update the cache of the inactive base, as read from the GDT/LDT. */
-        v->arch.pv.gs_base_user = rdgsbase();
+        v->arch.pv.gs_base_user = read_gs_base();
 
         asm volatile ( safe_swapgs );
         break;
diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c
index 93af0c5e87..4f262122b7 100644
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -47,9 +47,9 @@ static void read_registers(struct cpu_user_regs *regs, unsigned long crs[8])
     regs->es = read_sreg(es);
     regs->fs = read_sreg(fs);
     regs->gs = read_sreg(gs);
-    crs[5] = rdfsbase();
-    crs[6] = rdgsbase();
-    crs[7] = rdgsshadow();
+    crs[5] = read_fs_base();
+    crs[6] = read_gs_base();
+    crs[7] = read_gs_shadow();
 }
 
 static void _show_registers(
diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h
index 5c44c79600..5e141ac5a5 100644
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -156,7 +156,7 @@ static inline unsigned long __rdgsbase(void)
     return base;
 }
 
-static inline unsigned long rdfsbase(void)
+static inline unsigned long read_fs_base(void)
 {
     unsigned long base;
 
@@ -168,7 +168,7 @@ static inline unsigned long rdfsbase(void)
     return base;
 }
 
-static inline unsigned long rdgsbase(void)
+static inline unsigned long read_gs_base(void)
 {
     unsigned long base;
 
@@ -180,7 +180,7 @@ static inline unsigned long rdgsbase(void)
     return base;
 }
 
-static inline unsigned long rdgsshadow(void)
+static inline unsigned long read_gs_shadow(void)
 {
     unsigned long base;
 
@@ -196,7 +196,7 @@ static inline unsigned long rdgsshadow(void)
     return base;
 }
 
-static inline void wrfsbase(unsigned long base)
+static inline void write_fs_base(unsigned long base)
 {
     if ( read_cr4() & X86_CR4_FSGSBASE )
 #ifdef HAVE_AS_FSGSBASE
@@ -208,7 +208,7 @@ static inline void wrfsbase(unsigned long base)
         wrmsrl(MSR_FS_BASE, base);
 }
 
-static inline void wrgsbase(unsigned long base)
+static inline void write_gs_base(unsigned long base)
 {
     if ( read_cr4() & X86_CR4_FSGSBASE )
 #ifdef HAVE_AS_FSGSBASE
@@ -220,7 +220,7 @@ static inline void wrgsbase(unsigned long base)
         wrmsrl(MSR_GS_BASE, base);
 }
 
-static inline void wrgsshadow(unsigned long base)
+static inline void write_gs_shadow(unsigned long base)
 {
     if ( read_cr4() & X86_CR4_FSGSBASE )
     {
-- 
2.11.0



^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/5] x86/asm: Split __wr{fs, gs}base() out of write_{fs, gs}_base()
  2020-09-09  9:59 [PATCH 0/5] x86/pv: Minor perf improvements in segment handling Andrew Cooper
  2020-09-09  9:59 ` [PATCH 1/5] x86/asm: Rename FS/GS base helpers Andrew Cooper
@ 2020-09-09  9:59 ` Andrew Cooper
  2020-09-10 14:47   ` [PATCH 2/5] x86/asm: Split __wr{fs,gs}base() out of write_{fs,gs}_base() Jan Beulich
  2020-09-09  9:59 ` [PATCH 3/5] x86/pv: Optimise prefetching in svm_load_segs() Andrew Cooper
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 18+ messages in thread
From: Andrew Cooper @ 2020-09-09  9:59 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné, Wei Liu

To match the read side which is already split out.  A future change will want
to use them.

No functional change.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
---
 xen/include/asm-x86/msr.h | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h
index 5e141ac5a5..16f95e7344 100644
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -156,6 +156,24 @@ static inline unsigned long __rdgsbase(void)
     return base;
 }
 
+static inline void __wrfsbase(unsigned long base)
+{
+#ifdef HAVE_AS_FSGSBASE
+    asm volatile ( "wrfsbase %0" :: "r" (base) );
+#else
+    asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd0" :: "a" (base) );
+#endif
+}
+
+static inline void __wrgsbase(unsigned long base)
+{
+#ifdef HAVE_AS_FSGSBASE
+    asm volatile ( "wrgsbase %0" :: "r" (base) );
+#else
+    asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8" :: "a" (base) );
+#endif
+}
+
 static inline unsigned long read_fs_base(void)
 {
     unsigned long base;
@@ -199,11 +217,7 @@ static inline unsigned long read_gs_shadow(void)
 static inline void write_fs_base(unsigned long base)
 {
     if ( read_cr4() & X86_CR4_FSGSBASE )
-#ifdef HAVE_AS_FSGSBASE
-        asm volatile ( "wrfsbase %0" :: "r" (base) );
-#else
-        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd0" :: "a" (base) );
-#endif
+        __wrfsbase(base);
     else
         wrmsrl(MSR_FS_BASE, base);
 }
@@ -211,11 +225,7 @@ static inline void write_fs_base(unsigned long base)
 static inline void write_gs_base(unsigned long base)
 {
     if ( read_cr4() & X86_CR4_FSGSBASE )
-#ifdef HAVE_AS_FSGSBASE
-        asm volatile ( "wrgsbase %0" :: "r" (base) );
-#else
-        asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8" :: "a" (base) );
-#endif
+        __wrgsbase(base);
     else
         wrmsrl(MSR_GS_BASE, base);
 }
-- 
2.11.0



^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/5] x86/pv: Optimise prefetching in svm_load_segs()
  2020-09-09  9:59 [PATCH 0/5] x86/pv: Minor perf improvements in segment handling Andrew Cooper
  2020-09-09  9:59 ` [PATCH 1/5] x86/asm: Rename FS/GS base helpers Andrew Cooper
  2020-09-09  9:59 ` [PATCH 2/5] x86/asm: Split __wr{fs, gs}base() out of write_{fs, gs}_base() Andrew Cooper
@ 2020-09-09  9:59 ` Andrew Cooper
  2020-09-10 14:57   ` Jan Beulich
  2020-09-09  9:59 ` [PATCH 4/5] x86/pv: Optimise to the segment context switching paths Andrew Cooper
  2020-09-09  9:59 ` [PATCH 5/5] x86/pv: Simplify emulation for the 64bit base MSRs Andrew Cooper
  4 siblings, 1 reply; 18+ messages in thread
From: Andrew Cooper @ 2020-09-09  9:59 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné, Wei Liu

Split into two functions.  Passing a load of zeros in results in somewhat poor
register scheduling in __context_switch().

Update the prefetching comment to note that the main point is the TLB fill.

Reorder the writes in svm_load_segs() to access the VMCB fields in ascending
order, which gets better next-line prefetch behaviour out of hardware.  Update
the prefetch instruction to match.

The net delta is:

  add/remove: 1/0 grow/shrink: 0/2 up/down: 38/-39 (-1)
  Function                                     old     new   delta
  svm_load_segs_prefetch                         -      38     +38
  __context_switch                             967     951     -16
  svm_load_segs                                291     268     -23

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
---
 xen/arch/x86/domain.c             |  2 +-
 xen/arch/x86/hvm/svm/svm.c        | 43 ++++++++++++++++++++-------------------
 xen/include/asm-x86/hvm/svm/svm.h |  5 +++--
 3 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 2271bee36a..0b0e3f8294 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1928,7 +1928,7 @@ static void __context_switch(void)
     /* Prefetch the VMCB if we expect to use it later in the context switch */
     if ( cpu_has_svm && is_pv_domain(nd) && !is_pv_32bit_domain(nd) &&
          !is_idle_domain(nd) )
-        svm_load_segs(0, 0, 0, 0, 0);
+        svm_load_segs_prefetch();
 #endif
 
     if ( need_full_gdt(nd) && !per_cpu(full_gdt_loaded, cpu) )
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 23b2a2aa17..9a2aca7770 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1520,6 +1520,19 @@ static void svm_init_erratum_383(const struct cpuinfo_x86 *c)
 }
 
 #ifdef CONFIG_PV
+void svm_load_segs_prefetch(void)
+{
+    struct vmcb_struct *vmcb = this_cpu(host_vmcb_va);
+
+    if ( vmcb )
+        /*
+         * The main reason for this prefetch is for the TLB fill.  Use the
+         * opporunity to fetch the lowest address used, to get the best
+         * behaviour out of hardwares next-line prefetcher.
+         */
+        prefetchw(&vmcb->fs);
+}
+
 bool svm_load_segs(unsigned int ldt_ents, unsigned long ldt_base,
                    unsigned long fs_base, unsigned long gs_base,
                    unsigned long gs_shadow)
@@ -1530,17 +1543,15 @@ bool svm_load_segs(unsigned int ldt_ents, unsigned long ldt_base,
     if ( unlikely(!vmcb) )
         return false;
 
-    if ( !ldt_base )
-    {
-        /*
-         * The actual structure field used here was arbitrarily chosen.
-         * Empirically it doesn't seem to matter much which element is used,
-         * and a clear explanation of the otherwise poor performance has not
-         * been found/provided so far.
-         */
-        prefetchw(&vmcb->ldtr);
-        return true;
-    }
+    vmcb->fs.sel = 0;
+    vmcb->fs.attr = 0;
+    vmcb->fs.limit = 0;
+    vmcb->fs.base = fs_base;
+
+    vmcb->gs.sel = 0;
+    vmcb->gs.attr = 0;
+    vmcb->gs.limit = 0;
+    vmcb->gs.base = gs_base;
 
     if ( likely(!ldt_ents) )
         memset(&vmcb->ldtr, 0, sizeof(vmcb->ldtr));
@@ -1558,16 +1569,6 @@ bool svm_load_segs(unsigned int ldt_ents, unsigned long ldt_base,
         vmcb->ldtr.base = ldt_base;
     }
 
-    vmcb->fs.sel = 0;
-    vmcb->fs.attr = 0;
-    vmcb->fs.limit = 0;
-    vmcb->fs.base = fs_base;
-
-    vmcb->gs.sel = 0;
-    vmcb->gs.attr = 0;
-    vmcb->gs.limit = 0;
-    vmcb->gs.base = gs_base;
-
     vmcb->kerngsbase = gs_shadow;
 
     svm_vmload_pa(per_cpu(host_vmcb, cpu));
diff --git a/xen/include/asm-x86/hvm/svm/svm.h b/xen/include/asm-x86/hvm/svm/svm.h
index 2310878e41..faeca40174 100644
--- a/xen/include/asm-x86/hvm/svm/svm.h
+++ b/xen/include/asm-x86/hvm/svm/svm.h
@@ -50,12 +50,13 @@ void __update_guest_eip(struct cpu_user_regs *regs, unsigned int inst_len);
 void svm_update_guest_cr(struct vcpu *, unsigned int cr, unsigned int flags);
 
 /*
- * PV context switch helper. Calls with zero ldt_base request a prefetch of
- * the VMCB area to be loaded from, instead of an actual load of state.
+ * PV context switch helpers.  Prefetching the VMCB area itself has been shown
+ * to be useful for performance.
  *
  * Must only be used for NUL FS/GS, as the segment attributes/limits are not
  * read from the GDT/LDT.
  */
+void svm_load_segs_prefetch(void);
 bool svm_load_segs(unsigned int ldt_ents, unsigned long ldt_base,
                    unsigned long fs_base, unsigned long gs_base,
                    unsigned long gs_shadow);
-- 
2.11.0



^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/5] x86/pv: Optimise to the segment context switching paths
  2020-09-09  9:59 [PATCH 0/5] x86/pv: Minor perf improvements in segment handling Andrew Cooper
                   ` (2 preceding siblings ...)
  2020-09-09  9:59 ` [PATCH 3/5] x86/pv: Optimise prefetching in svm_load_segs() Andrew Cooper
@ 2020-09-09  9:59 ` Andrew Cooper
  2020-09-11  9:49   ` Jan Beulich
  2020-09-09  9:59 ` [PATCH 5/5] x86/pv: Simplify emulation for the 64bit base MSRs Andrew Cooper
  4 siblings, 1 reply; 18+ messages in thread
From: Andrew Cooper @ 2020-09-09  9:59 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné, Wei Liu

Save the segment selectors with explicit asm, rather than with read_sreg().
This permits the use of MOV's m16 encoding, which avoids indirecting the
selector value through a register.

For {save,load}_segments(), opencode the fs/gs helpers, as the optimiser is
unable to rearrange the logic down to a single X86_CR4_FSGSBASE check.  This
removes several jumps and creates bigger basic blocks.

In load_segments(), optimise GS base handling substantially.  The call to
svm_load_segs() already needs gsb/gss the correct way around, so hoist the
logic for the later path to use it as well.  Swapping the inputs in GPRs is
far more efficient than using SWAPGS.

Previously, there was optionally one SWAPGS from the user/kernel mode check,
two SWAPGS's in write_gs_shadow() and two WRGSBASE's.  Updates to GS (4 or 5
here) in quick succession stall all contemporary pipelines repeatedly.  (Intel
Core/Xeon pipelines have segment register renaming[1], so can continue to
speculatively execute with one GS update in flight.  Other pipelines cannot
have two updates in flight concurrently, and must stall dispatch of the second
until the first has retired.)

Rewrite the logic to have exactly two WRGSBASEs and one SWAPGS, which removes
two stalles all contemporary processors.

Although modest, the resulting delta is:

  add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-106 (-106)
  Function                                     old     new   delta
  paravirt_ctxt_switch_from                    235     198     -37
  context_switch                              3582    3513     -69

in a common path.

[1] https://software.intel.com/security-software-guidance/insights/deep-dive-intel-analysis-speculative-behavior-swapgs-and-segment-registers

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
---
 xen/arch/x86/domain.c | 70 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 21 deletions(-)

diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 0b0e3f8294..ab71b9f79c 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1537,6 +1537,7 @@ arch_do_vcpu_op(
 static void load_segments(struct vcpu *n)
 {
     struct cpu_user_regs *uregs = &n->arch.user_regs;
+    unsigned long gsb = 0, gss = 0;
     bool compat = is_pv_32bit_vcpu(n);
     bool all_segs_okay = true, fs_gs_done = false;
 
@@ -1556,18 +1557,24 @@ static void load_segments(struct vcpu *n)
                    : [ok] "+r" (all_segs_okay)          \
                    : [_val] "rm" (val) )
 
-#ifdef CONFIG_HVM
-    if ( cpu_has_svm && !compat && (uregs->fs | uregs->gs) <= 3 )
+    if ( !compat )
     {
-        unsigned long gsb = n->arch.flags & TF_kernel_mode
-            ? n->arch.pv.gs_base_kernel : n->arch.pv.gs_base_user;
-        unsigned long gss = n->arch.flags & TF_kernel_mode
-            ? n->arch.pv.gs_base_user : n->arch.pv.gs_base_kernel;
+        gsb = n->arch.pv.gs_base_kernel;
+        gss = n->arch.pv.gs_base_user;
+
+        /*
+         * Figure out which way around gsb/gss want to be.  gsb needs to be
+         * the active context, and gss needs to be the inactive context.
+         */
+        if ( !(n->arch.flags & TF_kernel_mode) )
+            SWAP(gsb, gss);
 
-        fs_gs_done = svm_load_segs(n->arch.pv.ldt_ents, LDT_VIRT_START(n),
-                                   n->arch.pv.fs_base, gsb, gss);
+        if ( IS_ENABLED(CONFIG_HVM) && cpu_has_svm &&
+             (uregs->fs | uregs->gs) <= 3 )
+            fs_gs_done = svm_load_segs(n->arch.pv.ldt_ents, LDT_VIRT_START(n),
+                                       n->arch.pv.fs_base, gsb, gss);
     }
-#endif
+
     if ( !fs_gs_done )
     {
         load_LDT(n);
@@ -1581,13 +1588,19 @@ static void load_segments(struct vcpu *n)
 
     if ( !fs_gs_done && !compat )
     {
-        write_fs_base(n->arch.pv.fs_base);
-        write_gs_shadow(n->arch.pv.gs_base_kernel);
-        write_gs_base(n->arch.pv.gs_base_user);
-
-        /* If in kernel mode then switch the GS bases around. */
-        if ( (n->arch.flags & TF_kernel_mode) )
+        if ( read_cr4() & X86_CR4_FSGSBASE )
+        {
+            __wrgsbase(gss);
+            __wrfsbase(n->arch.pv.fs_base);
             asm volatile ( "swapgs" );
+            __wrgsbase(gsb);
+        }
+        else
+        {
+            wrmsrl(MSR_FS_BASE, n->arch.pv.fs_base);
+            wrmsrl(MSR_GS_BASE, gsb);
+            wrmsrl(MSR_SHADOW_GS_BASE, gss);
+        }
     }
 
     if ( unlikely(!all_segs_okay) )
@@ -1703,16 +1716,31 @@ static void save_segments(struct vcpu *v)
 {
     struct cpu_user_regs *regs = &v->arch.user_regs;
 
-    regs->ds = read_sreg(ds);
-    regs->es = read_sreg(es);
-    regs->fs = read_sreg(fs);
-    regs->gs = read_sreg(gs);
+    asm volatile ( "mov %%ds, %[ds];\n\t"
+                   "mov %%es, %[es];\n\t"
+                   "mov %%fs, %[fs];\n\t"
+                   "mov %%gs, %[gs];\n\t"
+                   : [ds] "=m" (regs->ds),
+                     [es] "=m" (regs->es),
+                     [fs] "=m" (regs->fs),
+                     [gs] "=m" (regs->gs) );
 
     if ( !is_pv_32bit_vcpu(v) )
     {
-        unsigned long gs_base = read_gs_base();
+        unsigned long fs_base, gs_base;
+
+        if ( read_cr4() & X86_CR4_FSGSBASE )
+        {
+            fs_base = __rdfsbase();
+            gs_base = __rdgsbase();
+        }
+        else
+        {
+            rdmsrl(MSR_FS_BASE, fs_base);
+            rdmsrl(MSR_GS_BASE, gs_base);
+        }
 
-        v->arch.pv.fs_base = read_fs_base();
+        v->arch.pv.fs_base = fs_base;
         if ( v->arch.flags & TF_kernel_mode )
             v->arch.pv.gs_base_kernel = gs_base;
         else
-- 
2.11.0



^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 5/5] x86/pv: Simplify emulation for the 64bit base MSRs
  2020-09-09  9:59 [PATCH 0/5] x86/pv: Minor perf improvements in segment handling Andrew Cooper
                   ` (3 preceding siblings ...)
  2020-09-09  9:59 ` [PATCH 4/5] x86/pv: Optimise to the segment context switching paths Andrew Cooper
@ 2020-09-09  9:59 ` Andrew Cooper
  2020-09-11 10:01   ` Jan Beulich
  4 siblings, 1 reply; 18+ messages in thread
From: Andrew Cooper @ 2020-09-09  9:59 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Jan Beulich, Roger Pau Monné, Wei Liu

is_pv_32bit_domain() is an expensive predicate, but isn't used for speculative
safety in this case.  Swap to checking the Long Mode bit in the CPUID policy,
which is the architecturally correct behaviour.

is_canonical_address() isn't a trivial predicate, but it will become more
complicated when 5-level support is added.  Rearrange write_msr() to collapse
the common checks.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>

For reference, the diff is:

  add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-152 (-152)
  Function                                     old     new   delta
  read_msr                                    1075    1030     -45
  write_msr                                   1537    1430    -107

but this isn't the point of the change.
---
 xen/arch/x86/pv/emul-priv-op.c | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index 9dd1d59423..0fd95fe9fa 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -848,6 +848,7 @@ static int read_msr(unsigned int reg, uint64_t *val,
 {
     struct vcpu *curr = current;
     const struct domain *currd = curr->domain;
+    const struct cpuid_policy *cp = currd->arch.cpuid;
     bool vpmu_msr = false;
     int ret;
 
@@ -869,19 +870,19 @@ static int read_msr(unsigned int reg, uint64_t *val,
         return X86EMUL_OKAY;
 
     case MSR_FS_BASE:
-        if ( is_pv_32bit_domain(currd) )
+        if ( !cp->extd.lm )
             break;
         *val = read_fs_base();
         return X86EMUL_OKAY;
 
     case MSR_GS_BASE:
-        if ( is_pv_32bit_domain(currd) )
+        if ( !cp->extd.lm )
             break;
         *val = read_gs_base();
         return X86EMUL_OKAY;
 
     case MSR_SHADOW_GS_BASE:
-        if ( is_pv_32bit_domain(currd) )
+        if ( !cp->extd.lm )
             break;
         *val = curr->arch.pv.gs_base_user;
         return X86EMUL_OKAY;
@@ -975,6 +976,7 @@ static int write_msr(unsigned int reg, uint64_t val,
 {
     struct vcpu *curr = current;
     const struct domain *currd = curr->domain;
+    const struct cpuid_policy *cp = currd->arch.cpuid;
     bool vpmu_msr = false;
     int ret;
 
@@ -991,22 +993,22 @@ static int write_msr(unsigned int reg, uint64_t val,
         uint64_t temp;
 
     case MSR_FS_BASE:
-        if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
-            break;
-        write_fs_base(val);
-        return X86EMUL_OKAY;
-
     case MSR_GS_BASE:
-        if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
-            break;
-        write_gs_base(val);
-        return X86EMUL_OKAY;
-
     case MSR_SHADOW_GS_BASE:
-        if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
+        if ( !cp->extd.lm || !is_canonical_address(val) )
             break;
-        write_gs_shadow(val);
-        curr->arch.pv.gs_base_user = val;
+
+        if ( reg == MSR_FS_BASE )
+            write_fs_base(val);
+        else if ( reg == MSR_GS_BASE )
+            write_gs_base(val);
+        else if ( reg == MSR_SHADOW_GS_BASE )
+        {
+            write_gs_shadow(val);
+            curr->arch.pv.gs_base_user = val;
+        }
+        else
+            ASSERT_UNREACHABLE();
         return X86EMUL_OKAY;
 
     case MSR_EFER:
-- 
2.11.0



^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/5] x86/asm: Rename FS/GS base helpers
  2020-09-09  9:59 ` [PATCH 1/5] x86/asm: Rename FS/GS base helpers Andrew Cooper
@ 2020-09-10 14:45   ` Jan Beulich
  2020-09-15  2:50   ` Tian, Kevin
  1 sibling, 0 replies; 18+ messages in thread
From: Jan Beulich @ 2020-09-10 14:45 UTC (permalink / raw)
  To: Andrew Cooper
  Cc: Xen-devel, Roger Pau Monné, Wei Liu, Jun Nakajima, Kevin Tian

On 09.09.2020 11:59, Andrew Cooper wrote:
> They are currently named after the FSGSBASE instructions, but are not thin
> wrappers around said instructions, and therefore do not accurately reflect the
> logic they perform, especially when it comes to functioning safely in non
> FSGSBASE context.
> 
> Rename them to {read,write}_{fs,gs}_base() to avoid confusion.

Well, I didn't think the names were confusing, but since you look
to think otherwise ...

> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Acked-by: Jan Beulich <jbeulich@suse.com>

Jan


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/5] x86/asm: Split __wr{fs,gs}base() out of write_{fs,gs}_base()
  2020-09-09  9:59 ` [PATCH 2/5] x86/asm: Split __wr{fs, gs}base() out of write_{fs, gs}_base() Andrew Cooper
@ 2020-09-10 14:47   ` Jan Beulich
  2020-09-14 14:34     ` Andrew Cooper
  0 siblings, 1 reply; 18+ messages in thread
From: Jan Beulich @ 2020-09-10 14:47 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel, Roger Pau Monné, Wei Liu

On 09.09.2020 11:59, Andrew Cooper wrote:
> To match the read side which is already split out.  A future change will want
> to use them.
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Acked-by: Jan Beulich <jbeulich@suse.com>
Of course ...

> --- a/xen/include/asm-x86/msr.h
> +++ b/xen/include/asm-x86/msr.h
> @@ -156,6 +156,24 @@ static inline unsigned long __rdgsbase(void)
>      return base;
>  }
>  
> +static inline void __wrfsbase(unsigned long base)
> +{
> +#ifdef HAVE_AS_FSGSBASE
> +    asm volatile ( "wrfsbase %0" :: "r" (base) );
> +#else
> +    asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd0" :: "a" (base) );
> +#endif
> +}
> +
> +static inline void __wrgsbase(unsigned long base)
> +{
> +#ifdef HAVE_AS_FSGSBASE
> +    asm volatile ( "wrgsbase %0" :: "r" (base) );
> +#else
> +    asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8" :: "a" (base) );
> +#endif
> +}

... I'd have preferred if you had used just a single leading
underscore, despite realizing this would introduce an inconsistency
with the read sides.

Jan


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/5] x86/pv: Optimise prefetching in svm_load_segs()
  2020-09-09  9:59 ` [PATCH 3/5] x86/pv: Optimise prefetching in svm_load_segs() Andrew Cooper
@ 2020-09-10 14:57   ` Jan Beulich
  2020-09-10 20:30     ` Andrew Cooper
  0 siblings, 1 reply; 18+ messages in thread
From: Jan Beulich @ 2020-09-10 14:57 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel, Roger Pau Monné, Wei Liu

On 09.09.2020 11:59, Andrew Cooper wrote:
> Split into two functions.  Passing a load of zeros in results in somewhat poor
> register scheduling in __context_switch().

I'm afraid I don't understand why this would be, no matter that
I trust you having observed this being the case: The registers
used for passing parameters are all call-clobbered anyway, so
the compiler can't use them for anything across the call. And
it would look pretty poor code generation wise if the XORs to
clear them (which effectively have no latency at all) would be
scheduled far ahead of the call, especially when there's better
use for the registers. The observation wasn't possibly from
before your recent dropping of two of the parameters, when they
couldn't all be passed in registers (albeit even then it would
be odd, as the change then should merely have lead to a slightly
smaller stack frame of the function)?

> Update the prefetching comment to note that the main point is the TLB fill.
> 
> Reorder the writes in svm_load_segs() to access the VMCB fields in ascending
> order, which gets better next-line prefetch behaviour out of hardware.  Update
> the prefetch instruction to match.
> 
> The net delta is:
> 
>   add/remove: 1/0 grow/shrink: 0/2 up/down: 38/-39 (-1)
>   Function                                     old     new   delta
>   svm_load_segs_prefetch                         -      38     +38
>   __context_switch                             967     951     -16
>   svm_load_segs                                291     268     -23

A net win of 1 byte ;-)

> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -1520,6 +1520,19 @@ static void svm_init_erratum_383(const struct cpuinfo_x86 *c)
>  }
>  
>  #ifdef CONFIG_PV
> +void svm_load_segs_prefetch(void)
> +{
> +    struct vmcb_struct *vmcb = this_cpu(host_vmcb_va);

const?

> +    if ( vmcb )
> +        /*
> +         * The main reason for this prefetch is for the TLB fill.  Use the
> +         * opporunity to fetch the lowest address used, to get the best
> +         * behaviour out of hardwares next-line prefetcher.

Nit: "opportunity" and "hardware's" ?

I'm not opposed to the change, but before giving my R-b I'd like to
understand the register scheduling background a little better.

Jan

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/5] x86/pv: Optimise prefetching in svm_load_segs()
  2020-09-10 14:57   ` Jan Beulich
@ 2020-09-10 20:30     ` Andrew Cooper
  2020-09-11  6:31       ` Jan Beulich
  0 siblings, 1 reply; 18+ messages in thread
From: Andrew Cooper @ 2020-09-10 20:30 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel, Roger Pau Monné, Wei Liu

On 10/09/2020 15:57, Jan Beulich wrote:
> On 09.09.2020 11:59, Andrew Cooper wrote:
>> Split into two functions.  Passing a load of zeros in results in somewhat poor
>> register scheduling in __context_switch().
> I'm afraid I don't understand why this would be, no matter that
> I trust you having observed this being the case: The registers
> used for passing parameters are all call-clobbered anyway, so
> the compiler can't use them for anything across the call. And
> it would look pretty poor code generation wise if the XORs to
> clear them (which effectively have no latency at all) would be
> scheduled far ahead of the call, especially when there's better
> use for the registers. The observation wasn't possibly from
> before your recent dropping of two of the parameters, when they
> couldn't all be passed in registers (albeit even then it would
> be odd, as the change then should merely have lead to a slightly
> smaller stack frame of the function)?

Hmm yes.  I wrote this patch before I did the assertion fix, and it the
comment didn't rebase very well.

Back then, one of the zeros was on the stack, which was definitely an
unwanted property.  Even though the XORs are mostly free, they're not
totally free, as they cost decode bandwidth and instruction cache space
(Trivial amounts, but still...).

In general, LTO's inter-procedural-analysis can figure out that
svm_load_segs_prefetch() doesn't use many registers, and the caller can
be optimised based on the fact that some registers aren't actually
clobbered.  (Then again, in this case with a sole caller, LTO really
ought to be able to inline and delete the function.)

How about "results in unnecessary caller setup code" ?

~Andrew


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 3/5] x86/pv: Optimise prefetching in svm_load_segs()
  2020-09-10 20:30     ` Andrew Cooper
@ 2020-09-11  6:31       ` Jan Beulich
  0 siblings, 0 replies; 18+ messages in thread
From: Jan Beulich @ 2020-09-11  6:31 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel, Roger Pau Monné, Wei Liu

On 10.09.2020 22:30, Andrew Cooper wrote:
> On 10/09/2020 15:57, Jan Beulich wrote:
>> On 09.09.2020 11:59, Andrew Cooper wrote:
>>> Split into two functions.  Passing a load of zeros in results in somewhat poor
>>> register scheduling in __context_switch().
>> I'm afraid I don't understand why this would be, no matter that
>> I trust you having observed this being the case: The registers
>> used for passing parameters are all call-clobbered anyway, so
>> the compiler can't use them for anything across the call. And
>> it would look pretty poor code generation wise if the XORs to
>> clear them (which effectively have no latency at all) would be
>> scheduled far ahead of the call, especially when there's better
>> use for the registers. The observation wasn't possibly from
>> before your recent dropping of two of the parameters, when they
>> couldn't all be passed in registers (albeit even then it would
>> be odd, as the change then should merely have lead to a slightly
>> smaller stack frame of the function)?
> 
> Hmm yes.  I wrote this patch before I did the assertion fix, and it the
> comment didn't rebase very well.
> 
> Back then, one of the zeros was on the stack, which was definitely an
> unwanted property.  Even though the XORs are mostly free, they're not
> totally free, as they cost decode bandwidth and instruction cache space
> (Trivial amounts, but still...).
> 
> In general, LTO's inter-procedural-analysis can figure out that
> svm_load_segs_prefetch() doesn't use many registers, and the caller can
> be optimised based on the fact that some registers aren't actually
> clobbered.  (Then again, in this case with a sole caller, LTO really
> ought to be able to inline and delete the function.)
> 
> How about "results in unnecessary caller setup code" ?

Yeah, that's probably better as a description.

Reviewed-by: Jan Beulich <jbeulich@suse.com>

Jan


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/5] x86/pv: Optimise to the segment context switching paths
  2020-09-09  9:59 ` [PATCH 4/5] x86/pv: Optimise to the segment context switching paths Andrew Cooper
@ 2020-09-11  9:49   ` Jan Beulich
  2020-09-11 12:53     ` Andrew Cooper
  0 siblings, 1 reply; 18+ messages in thread
From: Jan Beulich @ 2020-09-11  9:49 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel, Roger Pau Monné, Wei Liu

On 09.09.2020 11:59, Andrew Cooper wrote:
> Save the segment selectors with explicit asm, rather than with read_sreg().
> This permits the use of MOV's m16 encoding, which avoids indirecting the
> selector value through a register.

Instead of this, how about making read_sreg() look like

#define read_sreg(val, name) ({                                  \
    if ( sizeof(val) == 2 )                                      \
        asm volatile ( "mov %%" STR(name) ",%0" : "=m" (val) );  \
    else                                                         \
        asm volatile ( "mov %%" STR(name) ",%k0" : "=r" (val) ); \
})

which then also covers read_registers()? I have a full patch
ready to send, if you agree.

> For {save,load}_segments(), opencode the fs/gs helpers, as the optimiser is
> unable to rearrange the logic down to a single X86_CR4_FSGSBASE check.  This
> removes several jumps and creates bigger basic blocks.
> 
> In load_segments(), optimise GS base handling substantially.  The call to
> svm_load_segs() already needs gsb/gss the correct way around, so hoist the
> logic for the later path to use it as well.  Swapping the inputs in GPRs is
> far more efficient than using SWAPGS.
> 
> Previously, there was optionally one SWAPGS from the user/kernel mode check,
> two SWAPGS's in write_gs_shadow() and two WRGSBASE's.  Updates to GS (4 or 5
> here) in quick succession stall all contemporary pipelines repeatedly.  (Intel
> Core/Xeon pipelines have segment register renaming[1], so can continue to
> speculatively execute with one GS update in flight.  Other pipelines cannot
> have two updates in flight concurrently, and must stall dispatch of the second
> until the first has retired.)
> 
> Rewrite the logic to have exactly two WRGSBASEs and one SWAPGS, which removes
> two stalles all contemporary processors.
> 
> Although modest, the resulting delta is:
> 
>   add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-106 (-106)
>   Function                                     old     new   delta
>   paravirt_ctxt_switch_from                    235     198     -37
>   context_switch                              3582    3513     -69
> 
> in a common path.
> 
> [1] https://software.intel.com/security-software-guidance/insights/deep-dive-intel-analysis-speculative-behavior-swapgs-and-segment-registers
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Preferably re-based onto said change, and maybe with another adjustment
(see below)
Reviewed-by: Jan Beulich <jbeulich@suse.com>

> @@ -1556,18 +1557,24 @@ static void load_segments(struct vcpu *n)
>                     : [ok] "+r" (all_segs_okay)          \
>                     : [_val] "rm" (val) )
>  
> -#ifdef CONFIG_HVM
> -    if ( cpu_has_svm && !compat && (uregs->fs | uregs->gs) <= 3 )
> +    if ( !compat )
>      {
> -        unsigned long gsb = n->arch.flags & TF_kernel_mode
> -            ? n->arch.pv.gs_base_kernel : n->arch.pv.gs_base_user;
> -        unsigned long gss = n->arch.flags & TF_kernel_mode
> -            ? n->arch.pv.gs_base_user : n->arch.pv.gs_base_kernel;
> +        gsb = n->arch.pv.gs_base_kernel;
> +        gss = n->arch.pv.gs_base_user;
> +
> +        /*
> +         * Figure out which way around gsb/gss want to be.  gsb needs to be
> +         * the active context, and gss needs to be the inactive context.
> +         */
> +        if ( !(n->arch.flags & TF_kernel_mode) )
> +            SWAP(gsb, gss);
>  
> -        fs_gs_done = svm_load_segs(n->arch.pv.ldt_ents, LDT_VIRT_START(n),
> -                                   n->arch.pv.fs_base, gsb, gss);
> +        if ( IS_ENABLED(CONFIG_HVM) && cpu_has_svm &&

The change from #ifdef to IS_ENABLED() wants mirroring to the
prefetching site imo. I wonder though whether the adjustment is a
good idea: The declaration lives in svm.h, and I would view it as
quite reasonable for that header to not get included in !HVM builds
(there may be a lot of disentangling to do to get there, but still).

Jan


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] x86/pv: Simplify emulation for the 64bit base MSRs
  2020-09-09  9:59 ` [PATCH 5/5] x86/pv: Simplify emulation for the 64bit base MSRs Andrew Cooper
@ 2020-09-11 10:01   ` Jan Beulich
  2020-09-11 16:10     ` Andrew Cooper
  0 siblings, 1 reply; 18+ messages in thread
From: Jan Beulich @ 2020-09-11 10:01 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel, Roger Pau Monné, Wei Liu

On 09.09.2020 11:59, Andrew Cooper wrote:
> is_pv_32bit_domain() is an expensive predicate, but isn't used for speculative
> safety in this case.  Swap to checking the Long Mode bit in the CPUID policy,
> which is the architecturally correct behaviour.
> 
> is_canonical_address() isn't a trivial predicate, but it will become more
> complicated when 5-level support is added.  Rearrange write_msr() to collapse
> the common checks.

Did you mean "is" instead of "isn't" or "and" instead of "but"? The
way it is it doesn't look very logical to me.

> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>
with one more remark:

> @@ -991,22 +993,22 @@ static int write_msr(unsigned int reg, uint64_t val,
>          uint64_t temp;
>  
>      case MSR_FS_BASE:
> -        if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
> -            break;
> -        write_fs_base(val);
> -        return X86EMUL_OKAY;
> -
>      case MSR_GS_BASE:
> -        if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
> -            break;
> -        write_gs_base(val);
> -        return X86EMUL_OKAY;
> -
>      case MSR_SHADOW_GS_BASE:
> -        if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
> +        if ( !cp->extd.lm || !is_canonical_address(val) )
>              break;
> -        write_gs_shadow(val);
> -        curr->arch.pv.gs_base_user = val;
> +
> +        if ( reg == MSR_FS_BASE )
> +            write_fs_base(val);
> +        else if ( reg == MSR_GS_BASE )
> +            write_gs_base(val);
> +        else if ( reg == MSR_SHADOW_GS_BASE )

With the three case labels just above, I think this "else if" and ...

> +        {
> +            write_gs_shadow(val);
> +            curr->arch.pv.gs_base_user = val;
> +        }
> +        else
> +            ASSERT_UNREACHABLE();

... this assertion are at least close to being superfluous. Their
dropping would then also make me less inclined to ask for an
inner switch().

Jan


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/5] x86/pv: Optimise to the segment context switching paths
  2020-09-11  9:49   ` Jan Beulich
@ 2020-09-11 12:53     ` Andrew Cooper
  2020-09-11 13:28       ` Jan Beulich
  0 siblings, 1 reply; 18+ messages in thread
From: Andrew Cooper @ 2020-09-11 12:53 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel, Roger Pau Monné, Wei Liu

On 11/09/2020 10:49, Jan Beulich wrote:
> On 09.09.2020 11:59, Andrew Cooper wrote:
>> Save the segment selectors with explicit asm, rather than with read_sreg().
>> This permits the use of MOV's m16 encoding, which avoids indirecting the
>> selector value through a register.
> Instead of this, how about making read_sreg() look like
>
> #define read_sreg(val, name) ({                                  \
>     if ( sizeof(val) == 2 )                                      \
>         asm volatile ( "mov %%" STR(name) ",%0" : "=m" (val) );  \
>     else                                                         \
>         asm volatile ( "mov %%" STR(name) ",%k0" : "=r" (val) ); \
> })
>
> which then also covers read_registers()? I have a full patch
> ready to send, if you agree.

That will go wrong for

uint16_t ds; read_sreg(ds, ds);

as it will force the variable to be spilled onto the stack.

I don't think this is a clever move.


Furthermore, it is bad enough that read_sreg() already takes one magic
parameter which doesn't follow normal C rules - renaming to READ_SREG()
would be an improvement - but this is now adding a second which is a
capture by name.

I'm afraid that is a firm no from me.


There is one other place where we read all segment registers at once. 
Maybe having a static inline save_sregs(struct cpu_user_regs *) hiding
the asm block, but I'm not necessarily convinced of this plan either. 
At least it would cleanly separate the "I've obviously got a memory
operand" and "I almost certainly want it in a register anyway" logic.

>
>> For {save,load}_segments(), opencode the fs/gs helpers, as the optimiser is
>> unable to rearrange the logic down to a single X86_CR4_FSGSBASE check.  This
>> removes several jumps and creates bigger basic blocks.
>>
>> In load_segments(), optimise GS base handling substantially.  The call to
>> svm_load_segs() already needs gsb/gss the correct way around, so hoist the
>> logic for the later path to use it as well.  Swapping the inputs in GPRs is
>> far more efficient than using SWAPGS.
>>
>> Previously, there was optionally one SWAPGS from the user/kernel mode check,
>> two SWAPGS's in write_gs_shadow() and two WRGSBASE's.  Updates to GS (4 or 5
>> here) in quick succession stall all contemporary pipelines repeatedly.  (Intel
>> Core/Xeon pipelines have segment register renaming[1], so can continue to
>> speculatively execute with one GS update in flight.  Other pipelines cannot
>> have two updates in flight concurrently, and must stall dispatch of the second
>> until the first has retired.)
>>
>> Rewrite the logic to have exactly two WRGSBASEs and one SWAPGS, which removes
>> two stalles all contemporary processors.
>>
>> Although modest, the resulting delta is:
>>
>>   add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-106 (-106)
>>   Function                                     old     new   delta
>>   paravirt_ctxt_switch_from                    235     198     -37
>>   context_switch                              3582    3513     -69
>>
>> in a common path.
>>
>> [1] https://software.intel.com/security-software-guidance/insights/deep-dive-intel-analysis-speculative-behavior-swapgs-and-segment-registers
>>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Preferably re-based onto said change, and maybe with another adjustment
> (see below)
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>
>> @@ -1556,18 +1557,24 @@ static void load_segments(struct vcpu *n)
>>                     : [ok] "+r" (all_segs_okay)          \
>>                     : [_val] "rm" (val) )
>>  
>> -#ifdef CONFIG_HVM
>> -    if ( cpu_has_svm && !compat && (uregs->fs | uregs->gs) <= 3 )
>> +    if ( !compat )
>>      {
>> -        unsigned long gsb = n->arch.flags & TF_kernel_mode
>> -            ? n->arch.pv.gs_base_kernel : n->arch.pv.gs_base_user;
>> -        unsigned long gss = n->arch.flags & TF_kernel_mode
>> -            ? n->arch.pv.gs_base_user : n->arch.pv.gs_base_kernel;
>> +        gsb = n->arch.pv.gs_base_kernel;
>> +        gss = n->arch.pv.gs_base_user;
>> +
>> +        /*
>> +         * Figure out which way around gsb/gss want to be.  gsb needs to be
>> +         * the active context, and gss needs to be the inactive context.
>> +         */
>> +        if ( !(n->arch.flags & TF_kernel_mode) )
>> +            SWAP(gsb, gss);
>>  
>> -        fs_gs_done = svm_load_segs(n->arch.pv.ldt_ents, LDT_VIRT_START(n),
>> -                                   n->arch.pv.fs_base, gsb, gss);
>> +        if ( IS_ENABLED(CONFIG_HVM) && cpu_has_svm &&
> The change from #ifdef to IS_ENABLED() wants mirroring to the
> prefetching site imo. I wonder though whether the adjustment is a
> good idea: The declaration lives in svm.h, and I would view it as
> quite reasonable for that header to not get included in !HVM builds
> (there may be a lot of disentangling to do to get there, but still).

I'm not overly fussed, but there will absolutely have to be HVM stubs
for normal code.  I don't see why we should special case svm_load_segs()
to not have a stub.

~Andrew


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/5] x86/pv: Optimise to the segment context switching paths
  2020-09-11 12:53     ` Andrew Cooper
@ 2020-09-11 13:28       ` Jan Beulich
  0 siblings, 0 replies; 18+ messages in thread
From: Jan Beulich @ 2020-09-11 13:28 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Xen-devel, Roger Pau Monné, Wei Liu

On 11.09.2020 14:53, Andrew Cooper wrote:
> On 11/09/2020 10:49, Jan Beulich wrote:
>> On 09.09.2020 11:59, Andrew Cooper wrote:
>>> Save the segment selectors with explicit asm, rather than with read_sreg().
>>> This permits the use of MOV's m16 encoding, which avoids indirecting the
>>> selector value through a register.
>> Instead of this, how about making read_sreg() look like
>>
>> #define read_sreg(val, name) ({                                  \
>>     if ( sizeof(val) == 2 )                                      \
>>         asm volatile ( "mov %%" STR(name) ",%0" : "=m" (val) );  \
>>     else                                                         \
>>         asm volatile ( "mov %%" STR(name) ",%k0" : "=r" (val) ); \
>> })
>>
>> which then also covers read_registers()? I have a full patch
>> ready to send, if you agree.
> 
> That will go wrong for
> 
> uint16_t ds; read_sreg(ds, ds);
> 
> as it will force the variable to be spilled onto the stack.


Let me quote the main part of the description of the patch then:

"Under the assumption that standalone variables preferably wouldn't be
 of uint16_t (or unsigned short) type, build in a heuristic to do a
 store to memory when the output expression is two bytes wide. In the
 register variant, add a 'k' modifier to avoid assemblers possibly
 generating operand size of REX prefixes."

A local variable has no reason to be uint16_t; nowadays even
./CODING_STYLE says so.

> I don't think this is a clever move.
> 
> 
> Furthermore, it is bad enough that read_sreg() already takes one magic
> parameter which doesn't follow normal C rules - renaming to READ_SREG()
> would be an improvement - but this is now adding a second which is a
> capture by name.

I was expecting this to be a possible objection from you. I wouldn't
mind upper-casing the name, but ...

> I'm afraid that is a firm no from me.

... looks like you prefer the open-coding, while I'd like to avoid it
whenever reasonably possible.

> There is one other place where we read all segment registers at once. 
> Maybe having a static inline save_sregs(struct cpu_user_regs *) hiding
> the asm block, but I'm not necessarily convinced of this plan either. 
> At least it would cleanly separate the "I've obviously got a memory
> operand" and "I almost certainly want it in a register anyway" logic.

I could live with this as a compromise.

>>> @@ -1556,18 +1557,24 @@ static void load_segments(struct vcpu *n)
>>>                     : [ok] "+r" (all_segs_okay)          \
>>>                     : [_val] "rm" (val) )
>>>  
>>> -#ifdef CONFIG_HVM
>>> -    if ( cpu_has_svm && !compat && (uregs->fs | uregs->gs) <= 3 )
>>> +    if ( !compat )
>>>      {
>>> -        unsigned long gsb = n->arch.flags & TF_kernel_mode
>>> -            ? n->arch.pv.gs_base_kernel : n->arch.pv.gs_base_user;
>>> -        unsigned long gss = n->arch.flags & TF_kernel_mode
>>> -            ? n->arch.pv.gs_base_user : n->arch.pv.gs_base_kernel;
>>> +        gsb = n->arch.pv.gs_base_kernel;
>>> +        gss = n->arch.pv.gs_base_user;
>>> +
>>> +        /*
>>> +         * Figure out which way around gsb/gss want to be.  gsb needs to be
>>> +         * the active context, and gss needs to be the inactive context.
>>> +         */
>>> +        if ( !(n->arch.flags & TF_kernel_mode) )
>>> +            SWAP(gsb, gss);
>>>  
>>> -        fs_gs_done = svm_load_segs(n->arch.pv.ldt_ents, LDT_VIRT_START(n),
>>> -                                   n->arch.pv.fs_base, gsb, gss);
>>> +        if ( IS_ENABLED(CONFIG_HVM) && cpu_has_svm &&
>> The change from #ifdef to IS_ENABLED() wants mirroring to the
>> prefetching site imo. I wonder though whether the adjustment is a
>> good idea: The declaration lives in svm.h, and I would view it as
>> quite reasonable for that header to not get included in !HVM builds
>> (there may be a lot of disentangling to do to get there, but still).
> 
> I'm not overly fussed, but there will absolutely have to be HVM stubs
> for normal code.  I don't see why we should special case svm_load_segs()
> to not have a stub.

I don't see why they "absolutely" have to exist. With our relying on DCE
I don't think there'll need to be ones consistently for _every_ HVM
function used from more generic code. I also don't view this as "special
casing" - there are already various functions not having stubs, but
merely declarations. The special thing here is that, by their placement,
these declarations may not be in scope long term. Which is because we're
deliberately breaking proper layering here by using a HVM feature for PV.

Jan


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 5/5] x86/pv: Simplify emulation for the 64bit base MSRs
  2020-09-11 10:01   ` Jan Beulich
@ 2020-09-11 16:10     ` Andrew Cooper
  0 siblings, 0 replies; 18+ messages in thread
From: Andrew Cooper @ 2020-09-11 16:10 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel, Roger Pau Monné, Wei Liu

On 11/09/2020 11:01, Jan Beulich wrote:
> On 09.09.2020 11:59, Andrew Cooper wrote:
>> is_pv_32bit_domain() is an expensive predicate, but isn't used for speculative
>> safety in this case.  Swap to checking the Long Mode bit in the CPUID policy,
>> which is the architecturally correct behaviour.
>>
>> is_canonical_address() isn't a trivial predicate, but it will become more
>> complicated when 5-level support is added.  Rearrange write_msr() to collapse
>> the common checks.
> Did you mean "is" instead of "isn't" or "and" instead of "but"? The
> way it is it doesn't look very logical to me.

I guess the meaning got lost somewhere.

is_canonical_address() is currently not completely trivial, but also not
massively complicated either.

It will become much more complicated with LA57.

>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> with one more remark:
>
>> @@ -991,22 +993,22 @@ static int write_msr(unsigned int reg, uint64_t val,
>>          uint64_t temp;
>>  
>>      case MSR_FS_BASE:
>> -        if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
>> -            break;
>> -        write_fs_base(val);
>> -        return X86EMUL_OKAY;
>> -
>>      case MSR_GS_BASE:
>> -        if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
>> -            break;
>> -        write_gs_base(val);
>> -        return X86EMUL_OKAY;
>> -
>>      case MSR_SHADOW_GS_BASE:
>> -        if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
>> +        if ( !cp->extd.lm || !is_canonical_address(val) )
>>              break;
>> -        write_gs_shadow(val);
>> -        curr->arch.pv.gs_base_user = val;
>> +
>> +        if ( reg == MSR_FS_BASE )
>> +            write_fs_base(val);
>> +        else if ( reg == MSR_GS_BASE )
>> +            write_gs_base(val);
>> +        else if ( reg == MSR_SHADOW_GS_BASE )
> With the three case labels just above, I think this "else if" and ...
>
>> +        {
>> +            write_gs_shadow(val);
>> +            curr->arch.pv.gs_base_user = val;
>> +        }
>> +        else
>> +            ASSERT_UNREACHABLE();
> ... this assertion are at least close to being superfluous. Their
> dropping would then also make me less inclined to ask for an
> inner switch().

I'm not overly fussed, as this example is fairly trivial, but I was
attempting to go for something which ends up safe even in the case of a
bad edit to the outer switch statement.

I'd expect the compiler to be drop the both aspects you talk about.

~Andrew


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/5] x86/asm: Split __wr{fs,gs}base() out of write_{fs,gs}_base()
  2020-09-10 14:47   ` [PATCH 2/5] x86/asm: Split __wr{fs,gs}base() out of write_{fs,gs}_base() Jan Beulich
@ 2020-09-14 14:34     ` Andrew Cooper
  0 siblings, 0 replies; 18+ messages in thread
From: Andrew Cooper @ 2020-09-14 14:34 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Xen-devel, Roger Pau Monné, Wei Liu

On 10/09/2020 15:47, Jan Beulich wrote:
> On 09.09.2020 11:59, Andrew Cooper wrote:
>> To match the read side which is already split out.  A future change will want
>> to use them.
>>
>> No functional change.
>>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Acked-by: Jan Beulich <jbeulich@suse.com>
> Of course ...
>
>> --- a/xen/include/asm-x86/msr.h
>> +++ b/xen/include/asm-x86/msr.h
>> @@ -156,6 +156,24 @@ static inline unsigned long __rdgsbase(void)
>>      return base;
>>  }
>>  
>> +static inline void __wrfsbase(unsigned long base)
>> +{
>> +#ifdef HAVE_AS_FSGSBASE
>> +    asm volatile ( "wrfsbase %0" :: "r" (base) );
>> +#else
>> +    asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd0" :: "a" (base) );
>> +#endif
>> +}
>> +
>> +static inline void __wrgsbase(unsigned long base)
>> +{
>> +#ifdef HAVE_AS_FSGSBASE
>> +    asm volatile ( "wrgsbase %0" :: "r" (base) );
>> +#else
>> +    asm volatile ( ".byte 0xf3, 0x48, 0x0f, 0xae, 0xd8" :: "a" (base) );
>> +#endif
>> +}
> ... I'd have preferred if you had used just a single leading
> underscore, despite realizing this would introduce an inconsistency
> with the read sides.

You're welcome to change them if you wish.

As always, I value consistency far far higher than arbitrary rules which
don't impact us in practice.

~Andrew


^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH 1/5] x86/asm: Rename FS/GS base helpers
  2020-09-09  9:59 ` [PATCH 1/5] x86/asm: Rename FS/GS base helpers Andrew Cooper
  2020-09-10 14:45   ` Jan Beulich
@ 2020-09-15  2:50   ` Tian, Kevin
  1 sibling, 0 replies; 18+ messages in thread
From: Tian, Kevin @ 2020-09-15  2:50 UTC (permalink / raw)
  To: Andrew Cooper, Xen-devel
  Cc: Jan Beulich, Roger Pau Monné, Wei Liu, Nakajima, Jun

> From: Andrew Cooper <andrew.cooper3@citrix.com>
> Sent: Wednesday, September 9, 2020 5:59 PM
> 
> They are currently named after the FSGSBASE instructions, but are not thin
> wrappers around said instructions, and therefore do not accurately reflect
> the
> logic they perform, especially when it comes to functioning safely in non
> FSGSBASE context.
> 
> Rename them to {read,write}_{fs,gs}_base() to avoid confusion.
> 
> No functional change.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

> ---
> CC: Jan Beulich <JBeulich@suse.com>
> CC: Roger Pau Monné <roger.pau@citrix.com>
> CC: Wei Liu <wl@xen.org>
> CC: Jun Nakajima <jun.nakajima@intel.com>
> CC: Kevin Tian <kevin.tian@intel.com>
> ---
>  xen/arch/x86/domain.c          | 10 +++++-----
>  xen/arch/x86/hvm/vmx/vmx.c     |  8 ++++----
>  xen/arch/x86/pv/domain.c       |  2 +-
>  xen/arch/x86/pv/emul-priv-op.c | 14 +++++++-------
>  xen/arch/x86/x86_64/mm.c       |  8 ++++----
>  xen/arch/x86/x86_64/traps.c    |  6 +++---
>  xen/include/asm-x86/msr.h      | 12 ++++++------
>  7 files changed, 30 insertions(+), 30 deletions(-)
> 
> diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
> index e8e91cf080..2271bee36a 100644
> --- a/xen/arch/x86/domain.c
> +++ b/xen/arch/x86/domain.c
> @@ -1581,9 +1581,9 @@ static void load_segments(struct vcpu *n)
> 
>      if ( !fs_gs_done && !compat )
>      {
> -        wrfsbase(n->arch.pv.fs_base);
> -        wrgsshadow(n->arch.pv.gs_base_kernel);
> -        wrgsbase(n->arch.pv.gs_base_user);
> +        write_fs_base(n->arch.pv.fs_base);
> +        write_gs_shadow(n->arch.pv.gs_base_kernel);
> +        write_gs_base(n->arch.pv.gs_base_user);
> 
>          /* If in kernel mode then switch the GS bases around. */
>          if ( (n->arch.flags & TF_kernel_mode) )
> @@ -1710,9 +1710,9 @@ static void save_segments(struct vcpu *v)
> 
>      if ( !is_pv_32bit_vcpu(v) )
>      {
> -        unsigned long gs_base = rdgsbase();
> +        unsigned long gs_base = read_gs_base();
> 
> -        v->arch.pv.fs_base = rdfsbase();
> +        v->arch.pv.fs_base = read_fs_base();
>          if ( v->arch.flags & TF_kernel_mode )
>              v->arch.pv.gs_base_kernel = gs_base;
>          else
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index c4b40bf3cb..d26e102970 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -512,12 +512,12 @@ static void vmx_save_guest_msrs(struct vcpu *v)
>       * We cannot cache SHADOW_GS_BASE while the VCPU runs, as it can
>       * be updated at any time via SWAPGS, which we cannot trap.
>       */
> -    v->arch.hvm.vmx.shadow_gs = rdgsshadow();
> +    v->arch.hvm.vmx.shadow_gs = read_gs_shadow();
>  }
> 
>  static void vmx_restore_guest_msrs(struct vcpu *v)
>  {
> -    wrgsshadow(v->arch.hvm.vmx.shadow_gs);
> +    write_gs_shadow(v->arch.hvm.vmx.shadow_gs);
>      wrmsrl(MSR_STAR,           v->arch.hvm.vmx.star);
>      wrmsrl(MSR_LSTAR,          v->arch.hvm.vmx.lstar);
>      wrmsrl(MSR_SYSCALL_MASK,   v->arch.hvm.vmx.sfmask);
> @@ -2958,7 +2958,7 @@ static int vmx_msr_read_intercept(unsigned int
> msr, uint64_t *msr_content)
>          break;
> 
>      case MSR_SHADOW_GS_BASE:
> -        *msr_content = rdgsshadow();
> +        *msr_content = read_gs_shadow();
>          break;
> 
>      case MSR_STAR:
> @@ -3190,7 +3190,7 @@ static int vmx_msr_write_intercept(unsigned int
> msr, uint64_t msr_content)
>          else if ( msr == MSR_GS_BASE )
>              __vmwrite(GUEST_GS_BASE, msr_content);
>          else
> -            wrgsshadow(msr_content);
> +            write_gs_shadow(msr_content);
> 
>          break;
> 
> diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
> index 44e4ea2582..663e76c773 100644
> --- a/xen/arch/x86/pv/domain.c
> +++ b/xen/arch/x86/pv/domain.c
> @@ -452,7 +452,7 @@ void toggle_guest_mode(struct vcpu *v)
>       * Update the cached value of the GS base about to become inactive, as a
>       * subsequent context switch won't bother re-reading it.
>       */
> -    gs_base = rdgsbase();
> +    gs_base = read_gs_base();
>      if ( v->arch.flags & TF_kernel_mode )
>          v->arch.pv.gs_base_kernel = gs_base;
>      else
> diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-
> op.c
> index a192160f84..9dd1d59423 100644
> --- a/xen/arch/x86/pv/emul-priv-op.c
> +++ b/xen/arch/x86/pv/emul-priv-op.c
> @@ -511,10 +511,10 @@ static int read_segment(enum x86_segment seg,
>              reg->base = 0;
>              break;
>          case x86_seg_fs:
> -            reg->base = rdfsbase();
> +            reg->base = read_fs_base();
>              break;
>          case x86_seg_gs:
> -            reg->base = rdgsbase();
> +            reg->base = read_gs_base();
>              break;
>          }
> 
> @@ -871,13 +871,13 @@ static int read_msr(unsigned int reg, uint64_t *val,
>      case MSR_FS_BASE:
>          if ( is_pv_32bit_domain(currd) )
>              break;
> -        *val = rdfsbase();
> +        *val = read_fs_base();
>          return X86EMUL_OKAY;
> 
>      case MSR_GS_BASE:
>          if ( is_pv_32bit_domain(currd) )
>              break;
> -        *val = rdgsbase();
> +        *val = read_gs_base();
>          return X86EMUL_OKAY;
> 
>      case MSR_SHADOW_GS_BASE:
> @@ -993,19 +993,19 @@ static int write_msr(unsigned int reg, uint64_t val,
>      case MSR_FS_BASE:
>          if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
>              break;
> -        wrfsbase(val);
> +        write_fs_base(val);
>          return X86EMUL_OKAY;
> 
>      case MSR_GS_BASE:
>          if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
>              break;
> -        wrgsbase(val);
> +        write_gs_base(val);
>          return X86EMUL_OKAY;
> 
>      case MSR_SHADOW_GS_BASE:
>          if ( is_pv_32bit_domain(currd) || !is_canonical_address(val) )
>              break;
> -        wrgsshadow(val);
> +        write_gs_shadow(val);
>          curr->arch.pv.gs_base_user = val;
>          return X86EMUL_OKAY;
> 
> diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
> index b69cf2dc4f..0d11a9f500 100644
> --- a/xen/arch/x86/x86_64/mm.c
> +++ b/xen/arch/x86/x86_64/mm.c
> @@ -1030,7 +1030,7 @@ long do_set_segment_base(unsigned int which,
> unsigned long base)
>      {
>      case SEGBASE_FS:
>          if ( is_canonical_address(base) )
> -            wrfsbase(base);
> +            write_fs_base(base);
>          else
>              ret = -EINVAL;
>          break;
> @@ -1038,7 +1038,7 @@ long do_set_segment_base(unsigned int which,
> unsigned long base)
>      case SEGBASE_GS_USER:
>          if ( is_canonical_address(base) )
>          {
> -            wrgsshadow(base);
> +            write_gs_shadow(base);
>              v->arch.pv.gs_base_user = base;
>          }
>          else
> @@ -1047,7 +1047,7 @@ long do_set_segment_base(unsigned int which,
> unsigned long base)
> 
>      case SEGBASE_GS_KERNEL:
>          if ( is_canonical_address(base) )
> -            wrgsbase(base);
> +            write_gs_base(base);
>          else
>              ret = -EINVAL;
>          break;
> @@ -1096,7 +1096,7 @@ long do_set_segment_base(unsigned int which,
> unsigned long base)
>                         : [flat] "r" (FLAT_USER_DS32) );
> 
>          /* Update the cache of the inactive base, as read from the GDT/LDT. */
> -        v->arch.pv.gs_base_user = rdgsbase();
> +        v->arch.pv.gs_base_user = read_gs_base();
> 
>          asm volatile ( safe_swapgs );
>          break;
> diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c
> index 93af0c5e87..4f262122b7 100644
> --- a/xen/arch/x86/x86_64/traps.c
> +++ b/xen/arch/x86/x86_64/traps.c
> @@ -47,9 +47,9 @@ static void read_registers(struct cpu_user_regs *regs,
> unsigned long crs[8])
>      regs->es = read_sreg(es);
>      regs->fs = read_sreg(fs);
>      regs->gs = read_sreg(gs);
> -    crs[5] = rdfsbase();
> -    crs[6] = rdgsbase();
> -    crs[7] = rdgsshadow();
> +    crs[5] = read_fs_base();
> +    crs[6] = read_gs_base();
> +    crs[7] = read_gs_shadow();
>  }
> 
>  static void _show_registers(
> diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h
> index 5c44c79600..5e141ac5a5 100644
> --- a/xen/include/asm-x86/msr.h
> +++ b/xen/include/asm-x86/msr.h
> @@ -156,7 +156,7 @@ static inline unsigned long __rdgsbase(void)
>      return base;
>  }
> 
> -static inline unsigned long rdfsbase(void)
> +static inline unsigned long read_fs_base(void)
>  {
>      unsigned long base;
> 
> @@ -168,7 +168,7 @@ static inline unsigned long rdfsbase(void)
>      return base;
>  }
> 
> -static inline unsigned long rdgsbase(void)
> +static inline unsigned long read_gs_base(void)
>  {
>      unsigned long base;
> 
> @@ -180,7 +180,7 @@ static inline unsigned long rdgsbase(void)
>      return base;
>  }
> 
> -static inline unsigned long rdgsshadow(void)
> +static inline unsigned long read_gs_shadow(void)
>  {
>      unsigned long base;
> 
> @@ -196,7 +196,7 @@ static inline unsigned long rdgsshadow(void)
>      return base;
>  }
> 
> -static inline void wrfsbase(unsigned long base)
> +static inline void write_fs_base(unsigned long base)
>  {
>      if ( read_cr4() & X86_CR4_FSGSBASE )
>  #ifdef HAVE_AS_FSGSBASE
> @@ -208,7 +208,7 @@ static inline void wrfsbase(unsigned long base)
>          wrmsrl(MSR_FS_BASE, base);
>  }
> 
> -static inline void wrgsbase(unsigned long base)
> +static inline void write_gs_base(unsigned long base)
>  {
>      if ( read_cr4() & X86_CR4_FSGSBASE )
>  #ifdef HAVE_AS_FSGSBASE
> @@ -220,7 +220,7 @@ static inline void wrgsbase(unsigned long base)
>          wrmsrl(MSR_GS_BASE, base);
>  }
> 
> -static inline void wrgsshadow(unsigned long base)
> +static inline void write_gs_shadow(unsigned long base)
>  {
>      if ( read_cr4() & X86_CR4_FSGSBASE )
>      {
> --
> 2.11.0


^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2020-09-15  2:51 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-09  9:59 [PATCH 0/5] x86/pv: Minor perf improvements in segment handling Andrew Cooper
2020-09-09  9:59 ` [PATCH 1/5] x86/asm: Rename FS/GS base helpers Andrew Cooper
2020-09-10 14:45   ` Jan Beulich
2020-09-15  2:50   ` Tian, Kevin
2020-09-09  9:59 ` [PATCH 2/5] x86/asm: Split __wr{fs, gs}base() out of write_{fs, gs}_base() Andrew Cooper
2020-09-10 14:47   ` [PATCH 2/5] x86/asm: Split __wr{fs,gs}base() out of write_{fs,gs}_base() Jan Beulich
2020-09-14 14:34     ` Andrew Cooper
2020-09-09  9:59 ` [PATCH 3/5] x86/pv: Optimise prefetching in svm_load_segs() Andrew Cooper
2020-09-10 14:57   ` Jan Beulich
2020-09-10 20:30     ` Andrew Cooper
2020-09-11  6:31       ` Jan Beulich
2020-09-09  9:59 ` [PATCH 4/5] x86/pv: Optimise to the segment context switching paths Andrew Cooper
2020-09-11  9:49   ` Jan Beulich
2020-09-11 12:53     ` Andrew Cooper
2020-09-11 13:28       ` Jan Beulich
2020-09-09  9:59 ` [PATCH 5/5] x86/pv: Simplify emulation for the 64bit base MSRs Andrew Cooper
2020-09-11 10:01   ` Jan Beulich
2020-09-11 16:10     ` Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.