linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andy Lutomirski <luto@kernel.org>
To: X86 ML <x86@kernel.org>
Cc: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"Borislav Petkov" <bpetkov@suse.de>,
	"Linus Torvalds" <torvalds@linux-foundation.org>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Mel Gorman" <mgorman@suse.de>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"Nadav Amit" <nadav.amit@gmail.com>,
	"Andy Lutomirski" <luto@kernel.org>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>,
	kvm@vger.kernel.org, "Rik van Riel" <riel@redhat.com>,
	"Dave Hansen" <dave.hansen@intel.com>,
	"Nadav Amit" <namit@vmware.com>, "Michal Hocko" <mhocko@suse.com>,
	"Arjan van de Ven" <arjan@linux.intel.com>
Subject: [PATCH v2 11/11] x86,kvm: Teach KVM's VMX code that CR3 isn't a constant
Date: Mon, 22 May 2017 15:30:11 -0700	[thread overview]
Message-ID: <fdb15ba0e022c5b114dec631494753d1df48859c.1495492063.git.luto@kernel.org> (raw)
In-Reply-To: <cover.1495492063.git.luto@kernel.org>
In-Reply-To: <cover.1495492063.git.luto@kernel.org>

When PCID is enabled, CR3's PCID bits can change during context
switches, so KVM won't be able to treat CR3 as a per-mm constant any
more.

I structured this like the existing CR4 handling.  Under ordinary
circumstances (PCID disabled or if the current PCID and the value
that's already in the VMCS match), then we won't do an extra VMCS
write, and we'll never do an extra direct CR3 read.  The overhead
should be minimal.

I disallowed using the new helper in non-atomic context because
PCID support will cause CR3 to stop being constant in non-atomic
process context.

(Frankly, it also scares me a bit that KVM ever treated CR3 as
constant, but it looks like it was okay before.)

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: kvm@vger.kernel.org
Cc: Rik van Riel <riel@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Nadav Amit <namit@vmware.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/include/asm/mmu_context.h | 19 +++++++++++++++++++
 arch/x86/kvm/vmx.c                 | 21 ++++++++++++++++++---
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 187c39470a0b..f20d7ea47095 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -266,4 +266,23 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
 	return __pkru_allows_pkey(vma_pkey(vma), write);
 }
 
+
+/*
+ * This can be used from process context to figure out what the value of
+ * CR3 is without needing to do a (slow) read_cr3().
+ *
+ * It's intended to be used for code like KVM that sneakily changes CR3
+ * and needs to restore it.  It needs to be used very carefully.
+ */
+static inline unsigned long __get_current_cr3_fast(void)
+{
+	unsigned long cr3 = __pa(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd);
+
+	/* For now, be very restrictive about when this can be called. */
+	VM_WARN_ON(in_nmi() || !in_atomic());
+
+	VM_BUG_ON(cr3 != read_cr3());
+	return cr3;
+}
+
 #endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 535cc065b844..2771235072aa 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -48,6 +48,7 @@
 #include <asm/kexec.h>
 #include <asm/apic.h>
 #include <asm/irq_remapping.h>
+#include <asm/mmu_context.h>
 
 #include "trace.h"
 #include "pmu.h"
@@ -596,6 +597,7 @@ struct vcpu_vmx {
 		int           gs_ldt_reload_needed;
 		int           fs_reload_needed;
 		u64           msr_host_bndcfgs;
+		unsigned long vmcs_host_cr3;	/* May not match real cr3 */
 		unsigned long vmcs_host_cr4;	/* May not match real cr4 */
 	} host_state;
 	struct {
@@ -5017,12 +5019,19 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
 	u32 low32, high32;
 	unsigned long tmpl;
 	struct desc_ptr dt;
-	unsigned long cr0, cr4;
+	unsigned long cr0, cr3, cr4;
 
 	cr0 = read_cr0();
 	WARN_ON(cr0 & X86_CR0_TS);
 	vmcs_writel(HOST_CR0, cr0);  /* 22.2.3 */
-	vmcs_writel(HOST_CR3, read_cr3());  /* 22.2.3  FIXME: shadow tables */
+
+	/*
+	 * Save the most likely value for this task's CR3 in the VMCS.
+	 * We can't use __get_current_cr3_fast() because we're not atomic.
+	 */
+	cr3 = read_cr3();
+	vmcs_writel(HOST_CR3, cr3);		/* 22.2.3  FIXME: shadow tables */
+	vmx->host_state.vmcs_host_cr3 = cr3;
 
 	/* Save the most likely value for this task's CR4 in the VMCS. */
 	cr4 = cr4_read_shadow();
@@ -8927,7 +8936,7 @@ static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	unsigned long debugctlmsr, cr4;
+	unsigned long debugctlmsr, cr3, cr4;
 
 	/* Record the guest's net vcpu time for enforced NMI injections. */
 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
@@ -8953,6 +8962,12 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
 		vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);
 
+	cr3 = __get_current_cr3_fast();
+	if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) {
+		vmcs_writel(HOST_CR3, cr3);
+		vmx->host_state.vmcs_host_cr3 = cr3;
+	}
+
 	cr4 = cr4_read_shadow();
 	if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
 		vmcs_writel(HOST_CR4, cr4);
-- 
2.9.3

      parent reply	other threads:[~2017-05-22 22:30 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-05-22 22:30 [PATCH v2 00/11] x86 TLB flush cleanups, moving toward PCID support Andy Lutomirski
2017-05-22 22:30 ` [PATCH v2 01/11] x86/mm: Reimplement flush_tlb_page() using flush_tlb_mm_range() Andy Lutomirski
2017-05-24 10:21   ` [tip:x86/mm] " tip-bot for Andy Lutomirski
2017-05-22 22:30 ` [PATCH v2 02/11] x86/mm: Reduce indentation in flush_tlb_func() Andy Lutomirski
2017-05-24 10:22   ` [tip:x86/mm] " tip-bot for Andy Lutomirski
2017-05-22 22:30 ` [PATCH v2 03/11] x86/mm: Make the batched unmap TLB flush API more generic Andy Lutomirski
2017-05-24 10:22   ` [tip:x86/mm] mm, " tip-bot for Andy Lutomirski
2017-05-22 22:30 ` [PATCH v2 04/11] x86/mm: Pass flush_tlb_info to flush_tlb_others() etc Andy Lutomirski
2017-05-24  8:18   ` Ingo Molnar
2017-05-22 22:30 ` [PATCH v2 05/11] x86/mm: Change the leave_mm() condition for local TLB flushes Andy Lutomirski
2017-05-22 22:30 ` [PATCH v2 06/11] x86/mm: Refactor flush_tlb_mm_range() to merge local and remote cases Andy Lutomirski
2017-05-22 22:30 ` [PATCH v2 07/11] x86/mm: Use new merged flush logic in arch_tlbbatch_flush() Andy Lutomirski
2017-05-22 22:30 ` [PATCH v2 08/11] x86/mm: Remove the UP tlbflush code; always use the formerly SMP code Andy Lutomirski
2017-05-22 22:30 ` [PATCH v2 09/11] x86/mm: Rework lazy TLB to track the actual loaded mm Andy Lutomirski
2017-05-22 22:30 ` [PATCH v2 10/11] x86/mm: Be more consistent wrt PAGE_SHIFT vs PAGE_SIZE in tlb flush code Andy Lutomirski
2017-05-22 23:24   ` Nadav Amit
2017-05-22 23:41     ` Andy Lutomirski
2017-05-22 22:30 ` Andy Lutomirski [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=fdb15ba0e022c5b114dec631494753d1df48859c.1495492063.git.luto@kernel.org \
    --to=luto@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=arjan@linux.intel.com \
    --cc=bpetkov@suse.de \
    --cc=dave.hansen@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.com \
    --cc=nadav.amit@gmail.com \
    --cc=namit@vmware.com \
    --cc=pbonzini@redhat.com \
    --cc=riel@redhat.com \
    --cc=rkrcmar@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).