All of lore.kernel.org
 help / color / mirror / Atom feed
From: Juergen Gross <jgross@suse.com>
To: xen-devel@lists.xenproject.org
Cc: Juergen Gross <jgross@suse.com>,
	andrew.cooper3@citrix.com, wei.liu2@citrix.com,
	jbeulich@suse.com, dfaggioli@suse.com
Subject: [PATCH v2 4/6] xen/x86: disable global pages for domains with XPTI active
Date: Fri,  2 Mar 2018 09:14:01 +0100	[thread overview]
Message-ID: <20180302081403.16953-5-jgross@suse.com> (raw)
In-Reply-To: <20180302081403.16953-1-jgross@suse.com>

Instead of flushing the TLB from global pages when switching address
spaces with XPTI being active just disable global pages via %cr4
completely when a domain subject to XPTI is active. This avoids the
need for extra TLB flushes as loading %cr3 will remove all TLB
entries.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 xen/arch/x86/cpu/mtrr/generic.c | 32 +++++++++++++++++++++-----------
 xen/arch/x86/flushtlb.c         | 39 +++++++++++++++++++++++++--------------
 xen/arch/x86/x86_64/entry.S     | 10 ----------
 xen/include/asm-x86/domain.h    |  3 ++-
 4 files changed, 48 insertions(+), 36 deletions(-)

diff --git a/xen/arch/x86/cpu/mtrr/generic.c b/xen/arch/x86/cpu/mtrr/generic.c
index e9c0e5e059..d705138100 100644
--- a/xen/arch/x86/cpu/mtrr/generic.c
+++ b/xen/arch/x86/cpu/mtrr/generic.c
@@ -400,8 +400,10 @@ static DEFINE_SPINLOCK(set_atomicity_lock);
  * has been called.
  */
 
-static void prepare_set(void)
+static bool prepare_set(void)
 {
+	unsigned long cr4;
+
 	/*  Note that this is not ideal, since the cache is only flushed/disabled
 	   for this CPU while the MTRRs are changed, but changing this requires
 	   more invasive changes to the way the kernel boots  */
@@ -412,18 +414,22 @@ static void prepare_set(void)
 	write_cr0(read_cr0() | X86_CR0_CD);
 	wbinvd();
 
-	/*  TLB flushing here relies on Xen always using CR4.PGE. */
-	BUILD_BUG_ON(!(XEN_MINIMAL_CR4 & X86_CR4_PGE));
-	write_cr4(read_cr4() & ~X86_CR4_PGE);
+	cr4 = read_cr4();
+	if (cr4 & X86_CR4_PGE)
+		write_cr4(cr4 & ~X86_CR4_PGE);
+	else
+		asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
 
 	/*  Save MTRR state */
 	rdmsrl(MSR_MTRRdefType, deftype);
 
 	/*  Disable MTRRs, and set the default type to uncached  */
 	mtrr_wrmsr(MSR_MTRRdefType, deftype & ~0xcff);
+
+	return !!(cr4 & X86_CR4_PGE);
 }
 
-static void post_set(void)
+static void post_set(bool pge)
 {
 	/* Intel (P6) standard MTRRs */
 	mtrr_wrmsr(MSR_MTRRdefType, deftype);
@@ -432,7 +438,10 @@ static void post_set(void)
 	write_cr0(read_cr0() & ~X86_CR0_CD);
 
 	/*  Reenable CR4.PGE (also flushes the TLB) */
-	write_cr4(read_cr4() | X86_CR4_PGE);
+	if (pge)
+		write_cr4(read_cr4() | X86_CR4_PGE);
+	else
+		asm volatile( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" );
 
 	spin_unlock(&set_atomicity_lock);
 }
@@ -441,14 +450,15 @@ static void generic_set_all(void)
 {
 	unsigned long mask, count;
 	unsigned long flags;
+	bool pge;
 
 	local_irq_save(flags);
-	prepare_set();
+	pge = prepare_set();
 
 	/* Actually set the state */
 	mask = set_mtrr_state();
 
-	post_set();
+	post_set(pge);
 	local_irq_restore(flags);
 
 	/*  Use the atomic bitops to update the global mask  */
@@ -457,7 +467,6 @@ static void generic_set_all(void)
 			set_bit(count, &smp_changes_mask);
 		mask >>= 1;
 	}
-	
 }
 
 static void generic_set_mtrr(unsigned int reg, unsigned long base,
@@ -474,11 +483,12 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
 {
 	unsigned long flags;
 	struct mtrr_var_range *vr;
+	bool pge;
 
 	vr = &mtrr_state.var_ranges[reg];
 
 	local_irq_save(flags);
-	prepare_set();
+	pge = prepare_set();
 
 	if (size == 0) {
 		/* The invalid bit is kept in the mask, so we simply clear the
@@ -499,7 +509,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
 		mtrr_wrmsr(MSR_IA32_MTRR_PHYSMASK(reg), vr->mask);
 	}
 
-	post_set();
+	post_set(pge);
 	local_irq_restore(flags);
 }
 
diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
index e4ea4f3297..186d9099f6 100644
--- a/xen/arch/x86/flushtlb.c
+++ b/xen/arch/x86/flushtlb.c
@@ -72,20 +72,39 @@ static void post_flush(u32 t)
     this_cpu(tlbflush_time) = t;
 }
 
+static void do_flush_tlb(unsigned long cr3)
+{
+    unsigned long cr4;
+
+    cr4 = read_cr4();
+    if ( cr4 & X86_CR4_PGE )
+    {
+        write_cr4(cr4 & ~X86_CR4_PGE);
+        if ( cr3 )
+            asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
+        else
+            barrier();
+        write_cr4(cr4);
+    }
+    else
+    {
+        if ( !cr3 )
+            cr3 = read_cr3();
+        asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
+    }
+}
+
 void write_cr3(unsigned long cr3)
 {
-    unsigned long flags, cr4;
+    unsigned long flags;
     u32 t;
 
     /* This non-reentrant function is sometimes called in interrupt context. */
     local_irq_save(flags);
 
     t = pre_flush();
-    cr4 = read_cr4();
 
-    write_cr4(cr4 & ~X86_CR4_PGE);
-    asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
-    write_cr4(cr4);
+    do_flush_tlb(cr3);
 
     post_flush(t);
 
@@ -123,22 +142,14 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
             u32 t = pre_flush();
 
             if ( !cpu_has_invpcid )
-            {
-                unsigned long cr4 = read_cr4();
-
-                write_cr4(cr4 & ~X86_CR4_PGE);
-                barrier();
-                write_cr4(cr4);
-            }
+                do_flush_tlb(0);
             else
-            {
                 /*
                  * Using invpcid to flush all mappings works
                  * regardless of whether PCID is enabled or not.
                  * It is faster than read-modify-write CR4.
                  */
                 invpcid_flush_all();
-            }
 
             post_flush(t);
         }
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index cdcdc2c40a..a8d38e7eb2 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -73,13 +73,8 @@ restore_all_guest:
                 ROOT_PAGETABLE_LAST_XEN_SLOT - 1) * 8, %rdi
         rep movsq
 .Lrag_copy_done:
-        mov   STACK_CPUINFO_FIELD(cr4)(%rdx), %rdi
         mov   %r9, STACK_CPUINFO_FIELD(xen_cr3)(%rdx)
-        mov   %rdi, %rsi
-        and   $~X86_CR4_PGE, %rdi
-        mov   %rdi, %cr4
         mov   %rax, %cr3
-        mov   %rsi, %cr4
 .Lrag_cr3_end:
         ALTERNATIVE_NOP .Lrag_cr3_start, .Lrag_cr3_end, X86_FEATURE_NO_XPTI
 
@@ -136,12 +131,7 @@ restore_all_xen:
          * so "g" will have to do.
          */
 UNLIKELY_START(g, exit_cr3)
-        mov   %cr4, %rdi
-        mov   %rdi, %rsi
-        and   $~X86_CR4_PGE, %rdi
-        mov   %rdi, %cr4
         mov   %rax, %cr3
-        mov   %rsi, %cr4
 UNLIKELY_END(exit_cr3)
 
         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
index 0cc37dea05..316418a6fe 100644
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -622,7 +622,8 @@ unsigned long pv_guest_cr4_fixup(const struct vcpu *, unsigned long guest_cr4);
             X86_CR4_SMAP | X86_CR4_OSXSAVE |                \
             X86_CR4_FSGSBASE))                              \
       | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0))         \
-     & ~X86_CR4_DE)
+     & ~(X86_CR4_DE |                                       \
+         ((v)->domain->arch.pv_domain.xpti ? X86_CR4_PGE : 0)))
 #define real_cr4_to_pv_guest_cr4(c)                         \
     ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD |      \
              X86_CR4_OSXSAVE | X86_CR4_SMEP |               \
-- 
2.13.6


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  parent reply	other threads:[~2018-03-02  8:14 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-02  8:13 [PATCH v2 0/6] xen/x86: various XPTI speedups Juergen Gross
2018-03-02  8:13 ` [PATCH v2 1/6] x86/xpti: avoid copying L4 page table contents when possible Juergen Gross
2018-03-05 16:43   ` Jan Beulich
2018-03-08 11:59     ` Juergen Gross
2018-03-08 12:47       ` Jan Beulich
     [not found]       ` <5AA13EEA02000078001AFCAF@suse.com>
2018-03-08 13:03         ` Juergen Gross
     [not found]   ` <5A9D81DC02000078001AEB68@suse.com>
2018-03-06  7:01     ` Juergen Gross
2018-03-06  7:58       ` Jan Beulich
     [not found]       ` <5A9E583002000078001AED3A@suse.com>
2018-03-06  8:06         ` Juergen Gross
2018-03-06  8:17           ` Jan Beulich
2018-03-02  8:13 ` [PATCH v2 2/6] x86/xpti: don't flush TLB twice when switching to 64-bit pv context Juergen Gross
2018-03-05 16:49   ` Jan Beulich
     [not found]   ` <5A9D831F02000078001AEB7E@suse.com>
2018-03-06  7:02     ` Juergen Gross
2018-03-02  8:14 ` [PATCH v2 3/6] xen/x86: support per-domain flag for xpti Juergen Gross
2018-03-08 10:17   ` Jan Beulich
     [not found]   ` <5AA11BDE02000078001AFB92@suse.com>
2018-03-08 11:30     ` Juergen Gross
2018-03-08 12:49       ` Jan Beulich
     [not found]       ` <5AA13F7D02000078001AFCB3@suse.com>
2018-03-08 13:13         ` Juergen Gross
2018-03-02  8:14 ` Juergen Gross [this message]
2018-03-02 11:03   ` [PATCH v2 4/6] xen/x86: disable global pages for domains with XPTI active Wei Liu
2018-03-02 11:30     ` Juergen Gross
2018-03-08 13:38   ` Jan Beulich
2018-03-09  3:01     ` Tian, Kevin
2018-03-09  5:23     ` Tian, Kevin
2018-03-09  8:34       ` Jan Beulich
     [not found]       ` <5AA2551002000078001B0116@suse.com>
2018-03-09  8:42         ` Juergen Gross
     [not found]   ` <5AA14AF302000078001AFD30@suse.com>
2018-03-08 14:05     ` Juergen Gross
2018-03-08 14:33       ` Jan Beulich
     [not found]       ` <5AA157E002000078001AFDA4@suse.com>
2018-03-08 14:39         ` Juergen Gross
2018-03-08 15:06   ` Jan Beulich
2018-03-09 14:40     ` Juergen Gross
2018-03-09 15:30       ` Jan Beulich
2018-03-02  8:14 ` [PATCH v2 5/6] xen/x86: use flag byte for decision whether xen_cr3 is valid Juergen Gross
2018-03-08 14:24   ` Jan Beulich
     [not found]   ` <5AA155BE02000078001AFD89@suse.com>
2018-03-08 14:28     ` Juergen Gross
2018-03-02  8:14 ` [PATCH v2 6/6] xen/x86: use PCID feature for XPTI Juergen Gross
2018-03-08 15:27   ` Jan Beulich
2018-03-05 16:20 ` [PATCH v2 0/6] xen/x86: various XPTI speedups Dario Faggioli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180302081403.16953-5-jgross@suse.com \
    --to=jgross@suse.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=dfaggioli@suse.com \
    --cc=jbeulich@suse.com \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.