All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Woodhouse <dwmw2@infradead.org>
To: kvm <kvm@vger.kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Joao Martins <joao.m.martins@oracle.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	"jmattson@google.com" <jmattson@google.com>,
	"wanpengli@tencent.com" <wanpengli@tencent.com>,
	"seanjc@google.com" <seanjc@google.com>,
	"vkuznets@redhat.com" <vkuznets@redhat.com>,
	"mtosatti@redhat.com" <mtosatti@redhat.com>,
	"joro@8bytes.org" <joro@8bytes.org>,
	karahmed@amazon.com
Subject: [PATCH v2] KVM: x86: Fix recording of guest steal time / preempted status
Date: Tue, 02 Nov 2021 16:38:53 +0000	[thread overview]
Message-ID: <4369bbef7f0c2b239da419c917f9a9f2ca6a76f1.camel@infradead.org> (raw)
In-Reply-To: <5d4002373c3ae614cb87b72ba5b7cdc161a0cd46.camel@infradead.org>

[-- Attachment #1: Type: text/plain, Size: 8103 bytes --]

From: David Woodhouse <dwmw@amazon.co.uk>

In commit b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is
not missed") we switched to using a gfn_to_pfn_cache for accessing the
guest steal time structure in order to allow for an atomic xchg of the
preempted field. This has a couple of problems.

Firstly, kvm_map_gfn() doesn't work at all for IOMEM pages when the
atomic flag is set, which it is in kvm_steal_time_set_preempted(). So a
guest vCPU using an IOMEM page for its steal time would never have its
preempted field set.

Secondly, the gfn_to_pfn_cache is not invalidated in all cases where it
should have been. There are two stages to the GFN → PFN conversion;
first the GFN is converted to a userspace HVA, and then that HVA is
looked up in the process page tables to find the underlying host PFN.
Correct invalidation of the latter would require being hooked up to the
MMU notifiers, but that doesn't happen — so it just keeps mapping and
unmapping the *wrong* PFN after the userspace page tables change.

In the !IOMEM case at least the stale page *is* pinned all the time it's
cached, so it won't be freed and reused by anyone else while still
receiving the steal time updates. (This kind of makes a mockery of this
repeated map/unmap dance which I thought was supposed to avoid pinning
the page. AFAICT we might as well have just kept a kernel mapping of it
all the time).

But there's no point in a kernel mapping of it anyway, when in all cases
we care about, we have a perfectly serviceable userspace HVA for it. We
just need to implement the atomic xchg on the userspace address with
appropriate exception handling, which is fairly trivial.

Cc: stable@vger.kernel.org
Fixes: b043138246a4 ("x86/KVM: Make sure KVM_VCPU_FLUSH_TLB flag is not missed")
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
v2: Fix asm constraints (err is an output). Rebase so that it applies cleanly
    before the Xen series (which changes the argument to kvm_map_gfn() that
    is removed in this patch anyway.)

 
 arch/x86/include/asm/kvm_host.h |   2 +-
 arch/x86/kvm/x86.c              | 107 +++++++++++++++++++++++---------
 2 files changed, 78 insertions(+), 31 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 13f64654dfff..750f74da9793 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -751,7 +751,7 @@ struct kvm_vcpu_arch {
 		u8 preempted;
 		u64 msr_val;
 		u64 last_steal;
-		struct gfn_to_pfn_cache cache;
+		struct gfn_to_hva_cache cache;
 	} st;
 
 	u64 l1_tsc_offset;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bfe0de3008a6..e6905a1068ad 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3195,8 +3195,11 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
 
 static void record_steal_time(struct kvm_vcpu *vcpu)
 {
-	struct kvm_host_map map;
-	struct kvm_steal_time *st;
+	struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
+	struct kvm_steal_time __user *st;
+	struct kvm_memslots *slots;
+	u64 steal;
+	u32 version;
 
 	if (kvm_xen_msr_enabled(vcpu->kvm)) {
 		kvm_xen_runstate_set_running(vcpu);
@@ -3206,47 +3209,87 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
 		return;
 
-	/* -EAGAIN is returned in atomic context so we can just return. */
-	if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
-			&map, &vcpu->arch.st.cache, false))
+	if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm))
 		return;
 
-	st = map.hva +
-		offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+	slots = kvm_memslots(vcpu->kvm);
+
+	if (unlikely(slots->generation != ghc->generation ||
+		     kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
+		gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
+
+		/* We rely on the fact that it fits in a single page. */
+		BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
+
+		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
+		    kvm_is_error_hva(ghc->hva) || !ghc->memslot)
+			return;
+	}
+
+	st = (struct kvm_steal_time __user *)ghc->hva;
+	if (!user_access_begin(st, sizeof(*st)))
+		return;
 
 	/*
 	 * Doing a TLB flush here, on the guest's behalf, can avoid
 	 * expensive IPIs.
 	 */
 	if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
-		u8 st_preempted = xchg(&st->preempted, 0);
+		u8 st_preempted = 0;
+		int err;
+
+		asm volatile("1:\t" LOCK_PREFIX "xchgb %0, %2\n"
+			     "\txor %1, %1\n"
+			     "2:\n"
+			     "\t.section .fixup,\"ax\"\n"
+			     "3:\tmovl %3, %1\n"
+			     "\tjmp\t2b\n"
+			     "\t.previous\n"
+			     _ASM_EXTABLE_UA(1b, 3b)
+			     : "=r" (st_preempted),
+			       "=r" (err)
+			     : "m" (st->preempted),
+			       "i" (-EFAULT),
+			       "0" (st_preempted));
+		if (err)
+			goto out;
+
+		user_access_end();
+
+		vcpu->arch.st.preempted = 0;
 
 		trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
 				       st_preempted & KVM_VCPU_FLUSH_TLB);
 		if (st_preempted & KVM_VCPU_FLUSH_TLB)
 			kvm_vcpu_flush_tlb_guest(vcpu);
+
+		if (!user_access_begin(st, sizeof(*st)))
+			return;
 	} else {
-		st->preempted = 0;
+		unsafe_put_user(0, &st->preempted, out);
+		vcpu->arch.st.preempted = 0;
 	}
 
-	vcpu->arch.st.preempted = 0;
-
-	if (st->version & 1)
-		st->version += 1;  /* first time write, random junk */
+	unsafe_get_user(version, &st->version, out);
+	if (version & 1)
+		version += 1;  /* first time write, random junk */
 
-	st->version += 1;
+	version += 1;
+	unsafe_put_user(version, &st->version, out);
 
 	smp_wmb();
 
-	st->steal += current->sched_info.run_delay -
+	unsafe_get_user(steal, &st->steal, out);
+	steal += current->sched_info.run_delay -
 		vcpu->arch.st.last_steal;
 	vcpu->arch.st.last_steal = current->sched_info.run_delay;
+	unsafe_put_user(steal, &st->steal, out);
 
-	smp_wmb();
-
-	st->version += 1;
+	version += 1;
+	unsafe_put_user(version, &st->version, out);
 
-	kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
+ out:
+	user_access_end();
 }
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -4285,8 +4328,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 {
-	struct kvm_host_map map;
-	struct kvm_steal_time *st;
+	struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
+	struct kvm_steal_time __user *st;
+	struct kvm_memslots *slots;
+	static const u8 preempted = KVM_VCPU_PREEMPTED;
 
 	if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
 		return;
@@ -4294,16 +4339,21 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.st.preempted)
 		return;
 
-	if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
-			&vcpu->arch.st.cache, true))
+	/* This happens on process exit */
+	if (unlikely(current->mm != vcpu->kvm->mm))
 		return;
 
-	st = map.hva +
-		offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+	slots = kvm_memslots(vcpu->kvm);
 
-	st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
+	if (unlikely(slots->generation != ghc->generation ||
+		     kvm_is_error_hva(ghc->hva) || !ghc->memslot))
+		return;
 
-	kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
+	st = (struct kvm_steal_time __user *)ghc->hva;
+	BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted));
+
+	if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted)))
+		vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -10817,11 +10867,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
-	struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
 	int idx;
 
-	kvm_release_pfn(cache->pfn, cache->dirty, cache);
-
 	kvmclock_reset(vcpu);
 
 	static_call(kvm_x86_vcpu_free)(vcpu);
-- 
2.25.1


[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5174 bytes --]

  reply	other threads:[~2021-11-02 16:39 UTC|newest]

Thread overview: 103+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-01 14:09 [PATCH] KVM: x86: Fix recording of guest steal time / preempted status David Woodhouse
2021-11-02 16:38 ` David Woodhouse [this message]
2021-11-02 17:01   ` [PATCH v2] " Paolo Bonzini
2021-11-02 17:11     ` David Woodhouse
2021-11-02 17:19       ` Paolo Bonzini
2021-11-02 17:26         ` David Woodhouse
2021-11-02 17:36         ` [PATCH v3] " David Woodhouse
2021-11-11 13:23           ` Paolo Bonzini
2021-11-12  8:28             ` David Woodhouse
2021-11-12  9:31               ` Paolo Bonzini
2021-11-12  9:54                 ` David Woodhouse
2021-11-12 10:49                   ` Paolo Bonzini
2021-11-12 11:29                     ` David Woodhouse
2021-11-12 12:27                       ` Paolo Bonzini
2021-11-12 13:28                         ` David Woodhouse
2021-11-12 14:56                           ` Paolo Bonzini
2021-11-12 15:27                             ` David Woodhouse
2021-11-15 16:47                             ` [RFC PATCH 0/11] Rework gfn_to_pfn_cache David Woodhouse
2021-11-15 16:50                               ` [PATCH 01/11] KVM: x86: Fix steal time asm constraints in 32-bit mode David Woodhouse
2021-11-15 16:50                                 ` [PATCH 02/11] KVM: x86/xen: Fix get_attr of KVM_XEN_ATTR_TYPE_SHARED_INFO David Woodhouse
2021-11-15 16:50                                 ` [PATCH 03/11] KVM: selftests: Add event channel upcall support to xen_shinfo_test David Woodhouse
2021-11-15 16:50                                 ` [PATCH 04/11] KVM: x86/xen: Use sizeof_field() instead of open-coding it David Woodhouse
2021-11-15 16:50                                 ` [PATCH 05/11] KVM: nVMX: Use kvm_{read,write}_guest_cached() for shadow_vmcs12 David Woodhouse
2021-11-15 16:50                                 ` [PATCH 06/11] KVM: nVMX: Use kvm_read_guest_offset_cached() for nested VMCS check David Woodhouse
2021-11-15 16:50                                 ` [PATCH 07/11] KVM: nVMX: Use a gfn_to_hva_cache for vmptrld David Woodhouse
2021-11-15 16:50                                 ` [PATCH 08/11] KVM: Kill kvm_map_gfn() / kvm_unmap_gfn() and gfn_to_pfn_cache David Woodhouse
2021-11-16 10:21                                   ` Paolo Bonzini
2021-11-17 17:18                                     ` David Woodhouse
2021-11-15 16:50                                 ` [PATCH 09/11] KVM: Reinstate gfn_to_pfn_cache with invalidation support David Woodhouse
2021-11-15 16:50                                 ` [PATCH 10/11] KVM: x86/xen: Maintain valid mapping of Xen shared_info page David Woodhouse
2021-11-15 16:50                                 ` [PATCH 11/11] KVM: x86/xen: Add KVM_IRQ_ROUTING_XEN_EVTCHN and event channel delivery David Woodhouse
2021-11-15 17:02                                   ` David Woodhouse
2021-11-15 18:49                                   ` Paolo Bonzini
2021-11-15 18:55                                     ` David Woodhouse
2021-11-15 18:50                               ` [RFC PATCH 0/11] Rework gfn_to_pfn_cache Paolo Bonzini
2021-11-15 19:11                                 ` David Woodhouse
2021-11-15 19:26                                   ` Paolo Bonzini
2021-11-15 22:59                                     ` Sean Christopherson
2021-11-15 23:22                                       ` David Woodhouse
2021-11-16 13:17                                         ` David Woodhouse
2021-11-16 14:11                                           ` Paolo Bonzini
2021-11-16 14:25                                             ` David Woodhouse
2021-11-16 14:57                                               ` Paolo Bonzini
2021-11-16 15:09                                                 ` David Woodhouse
2021-11-16 15:49                                                   ` Paolo Bonzini
2021-11-16 16:06                                                     ` David Woodhouse
2021-11-16 17:42                                                       ` Paolo Bonzini
2021-11-16 17:57                                                         ` David Woodhouse
2021-11-16 18:46                                                           ` Paolo Bonzini
2021-11-16 19:34                                                             ` David Woodhouse
2021-11-15 23:24                                       ` David Woodhouse
2021-11-16 11:50                                     ` [PATCH 0/7] KVM: Add Makefile.kvm for common files David Woodhouse
2021-11-16 11:50                                       ` David Woodhouse
2021-11-16 11:50                                       ` David Woodhouse
2021-11-16 11:50                                       ` David Woodhouse
2021-11-16 11:50                                       ` [PATCH 1/7] KVM: Introduce CONFIG_HAVE_KVM_DIRTY_RING David Woodhouse
2021-11-16 11:50                                         ` David Woodhouse
2021-11-16 11:50                                         ` David Woodhouse
2021-11-16 11:50                                         ` David Woodhouse
2021-11-16 11:50                                         ` [PATCH 2/7] KVM: Add Makefile.kvm for common files, use it for x86 David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 11:50                                         ` [PATCH 3/7] KVM: s390: Use Makefile.kvm for common files David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-17  7:29                                           ` Christian Borntraeger
2021-11-17  7:29                                             ` Christian Borntraeger
2021-11-17  7:29                                             ` Christian Borntraeger
2021-11-17  7:29                                             ` Christian Borntraeger
2021-11-16 11:50                                         ` [PATCH 4/7] KVM: mips: " David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 11:50                                         ` [PATCH 5/7] KVM: RISC-V: " David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 11:50                                         ` [PATCH 6/7] KVM: powerpc: " David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 18:43                                           ` Sean Christopherson
2021-11-16 18:43                                             ` Sean Christopherson
2021-11-16 18:43                                             ` Sean Christopherson
2021-11-16 18:43                                             ` Sean Christopherson
2021-11-16 19:13                                             ` David Woodhouse
2021-11-16 19:13                                               ` David Woodhouse
2021-11-16 19:13                                               ` David Woodhouse
2021-11-16 19:13                                               ` David Woodhouse
2021-11-16 11:50                                         ` [PATCH 7/7] KVM: arm64: " David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-16 11:50                                           ` David Woodhouse
2021-11-15 21:38                                 ` [RFC PATCH 0/11] Rework gfn_to_pfn_cache David Woodhouse
2021-11-12 19:44                 ` [PATCH v3] KVM: x86: Fix recording of guest steal time / preempted status David Woodhouse
2021-11-03  9:47         ` [PATCH v2] " David Woodhouse
2021-11-03 12:35           ` Paolo Bonzini
2021-11-03 12:56             ` David Woodhouse
2021-11-03 13:05               ` Paolo Bonzini
2021-11-03 13:23                 ` David Woodhouse
2021-11-03 13:34                 ` David Woodhouse

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4369bbef7f0c2b239da419c917f9a9f2ca6a76f1.camel@infradead.org \
    --to=dwmw2@infradead.org \
    --cc=boris.ostrovsky@oracle.com \
    --cc=jmattson@google.com \
    --cc=joao.m.martins@oracle.com \
    --cc=joro@8bytes.org \
    --cc=karahmed@amazon.com \
    --cc=kvm@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.