All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ben Gardon <bgardon@google.com>
To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>, Peter Xu <peterx@redhat.com>,
	Sean Christopherson <seanjc@google.com>,
	Peter Shier <pshier@google.com>,
	Yulei Zhang <yulei.kernel@gmail.com>,
	Wanpeng Li <kernellwp@gmail.com>,
	Xiao Guangrong <xiaoguangrong.eric@gmail.com>,
	Kai Huang <kai.huang@intel.com>,
	Keqian Zhu <zhukeqian1@huawei.com>,
	David Hildenbrand <david@redhat.com>,
	Ben Gardon <bgardon@google.com>
Subject: [PATCH v4 7/7] KVM: x86/mmu: Lazily allocate memslot rmaps
Date: Tue, 11 May 2021 10:16:10 -0700	[thread overview]
Message-ID: <20210511171610.170160-8-bgardon@google.com> (raw)
In-Reply-To: <20210511171610.170160-1-bgardon@google.com>

If the TDP MMU is in use, wait to allocate the rmaps until the shadow
MMU is actually used. (i.e. a nested VM is launched.) This saves memory
equal to 0.2% of guest memory in cases where the TDP MMU is used and
there are no nested guests involved.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/mmu/mmu.c          | 53 +++++++++++++++++++++++----------
 arch/x86/kvm/mmu/tdp_mmu.c      |  6 ++--
 arch/x86/kvm/mmu/tdp_mmu.h      |  4 +--
 arch/x86/kvm/x86.c              | 45 +++++++++++++++++++++++++++-
 5 files changed, 89 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fc75ed49bfee..7b65f82ade1c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1868,4 +1868,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 
 int kvm_cpu_dirty_log_size(void);
 
+int alloc_all_memslots_rmaps(struct kvm *kvm);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b0bdb924d519..183afccd2944 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1190,7 +1190,8 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 		kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot,
 				slot->base_gfn + gfn_offset, mask, true);
 
-	if (!kvm->arch.memslots_have_rmaps)
+	/* Read memslots_have_rmaps before the rmaps themselves */
+	if (!smp_load_acquire(&kvm->arch.memslots_have_rmaps))
 		return;
 
 	while (mask) {
@@ -1223,7 +1224,8 @@ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 		kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot,
 				slot->base_gfn + gfn_offset, mask, false);
 
-	if (!kvm->arch.memslots_have_rmaps)
+	/* Read memslots_have_rmaps before the rmaps themselves */
+	if (!smp_load_acquire(&kvm->arch.memslots_have_rmaps))
 		return;
 
 	while (mask) {
@@ -1268,7 +1270,8 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 	int i;
 	bool write_protected = false;
 
-	if (kvm->arch.memslots_have_rmaps) {
+	/* Read memslots_have_rmaps before the rmaps themselves */
+	if (smp_load_acquire(&kvm->arch.memslots_have_rmaps)) {
 		for (i = PG_LEVEL_4K; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
 			rmap_head = __gfn_to_rmap(gfn, i, slot);
 			write_protected |= __rmap_write_protect(kvm, rmap_head,
@@ -1446,7 +1449,8 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	bool flush = false;
 
-	if (kvm->arch.memslots_have_rmaps)
+	/* Read memslots_have_rmaps before the rmaps themselves */
+	if (smp_load_acquire(&kvm->arch.memslots_have_rmaps))
 		flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);
 
 	if (is_tdp_mmu_enabled(kvm))
@@ -1459,7 +1463,8 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	bool flush = false;
 
-	if (kvm->arch.memslots_have_rmaps)
+	/* Read memslots_have_rmaps before the rmaps themselves */
+	if (smp_load_acquire(&kvm->arch.memslots_have_rmaps))
 		flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmapp);
 
 	if (is_tdp_mmu_enabled(kvm))
@@ -1515,7 +1520,8 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	bool young = false;
 
-	if (kvm->arch.memslots_have_rmaps)
+	/* Read memslots_have_rmaps before the rmaps themselves */
+	if (smp_load_acquire(&kvm->arch.memslots_have_rmaps))
 		young = kvm_handle_gfn_range(kvm, range, kvm_age_rmapp);
 
 	if (is_tdp_mmu_enabled(kvm))
@@ -1528,7 +1534,8 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	bool young = false;
 
-	if (kvm->arch.memslots_have_rmaps)
+	/* Read memslots_have_rmaps before the rmaps themselves */
+	if (smp_load_acquire(&kvm->arch.memslots_have_rmaps))
 		young = kvm_handle_gfn_range(kvm, range, kvm_test_age_rmapp);
 
 	if (is_tdp_mmu_enabled(kvm))
@@ -3295,6 +3302,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		}
 	}
 
+	r = alloc_all_memslots_rmaps(vcpu->kvm);
+	if (r)
+		return r;
+
 	write_lock(&vcpu->kvm->mmu_lock);
 	r = make_mmu_pages_available(vcpu);
 	if (r < 0)
@@ -5455,7 +5466,8 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
 	 */
 	kvm_reload_remote_mmus(kvm);
 
-	if (kvm->arch.memslots_have_rmaps)
+	/* Read memslots_have_rmaps before the rmaps themselves */
+	if (smp_load_acquire(&kvm->arch.memslots_have_rmaps))
 		kvm_zap_obsolete_pages(kvm);
 
 	write_unlock(&kvm->mmu_lock);
@@ -5483,9 +5495,13 @@ void kvm_mmu_init_vm(struct kvm *kvm)
 {
 	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
 
-	kvm_mmu_init_tdp_mmu(kvm);
-
-	kvm->arch.memslots_have_rmaps = true;
+	if (!kvm_mmu_init_tdp_mmu(kvm))
+		/*
+		 * No smp_load/store wrappers needed here as we are in
+		 * VM init and there cannot be any memslots / other threads
+		 * accessing this struct kvm yet.
+		 */
+		kvm->arch.memslots_have_rmaps = true;
 
 	node->track_write = kvm_mmu_pte_write;
 	node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
@@ -5508,7 +5524,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 	int i;
 	bool flush = false;
 
-	if (kvm->arch.memslots_have_rmaps) {
+	/* Read memslots_have_rmaps before the rmaps themselves */
+	if (smp_load_acquire(&kvm->arch.memslots_have_rmaps)) {
 		write_lock(&kvm->mmu_lock);
 		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
 			slots = __kvm_memslots(kvm, i);
@@ -5559,7 +5576,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 {
 	bool flush = false;
 
-	if (kvm->arch.memslots_have_rmaps) {
+	/* Read memslots_have_rmaps before the rmaps themselves */
+	if (smp_load_acquire(&kvm->arch.memslots_have_rmaps)) {
 		write_lock(&kvm->mmu_lock);
 		flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
 					  start_level, KVM_MAX_HUGEPAGE_LEVEL,
@@ -5635,7 +5653,8 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 	struct kvm_memory_slot *slot = (struct kvm_memory_slot *)memslot;
 	bool flush;
 
-	if (kvm->arch.memslots_have_rmaps) {
+	/* Read memslots_have_rmaps before the rmaps themselves */
+	if (smp_load_acquire(&kvm->arch.memslots_have_rmaps)) {
 		write_lock(&kvm->mmu_lock);
 		flush = slot_handle_leaf(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
 		if (flush)
@@ -5672,7 +5691,8 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
 {
 	bool flush = false;
 
-	if (kvm->arch.memslots_have_rmaps) {
+	/* Read memslots_have_rmaps before the rmaps themselves */
+	if (smp_load_acquire(&kvm->arch.memslots_have_rmaps)) {
 		write_lock(&kvm->mmu_lock);
 		flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty,
 					 false);
@@ -5705,7 +5725,8 @@ void kvm_mmu_zap_all(struct kvm *kvm)
 	if (is_tdp_mmu_enabled(kvm))
 		kvm_tdp_mmu_zap_all(kvm);
 
-	if (!kvm->arch.memslots_have_rmaps) {
+	/* Read memslots_have_rmaps before the rmaps themselves */
+	if (!smp_load_acquire(&kvm->arch.memslots_have_rmaps)) {
 		write_unlock(&kvm->mmu_lock);
 		return;
 	}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 95eeb5ac6a8a..ea00c9502ba1 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -14,10 +14,10 @@ static bool __read_mostly tdp_mmu_enabled = false;
 module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
 
 /* Initializes the TDP MMU for the VM, if enabled. */
-void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
+bool kvm_mmu_init_tdp_mmu(struct kvm *kvm)
 {
 	if (!tdp_enabled || !READ_ONCE(tdp_mmu_enabled))
-		return;
+		return false;
 
 	/* This should not be changed for the lifetime of the VM. */
 	kvm->arch.tdp_mmu_enabled = true;
@@ -25,6 +25,8 @@ void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
 	INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
 	spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
 	INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages);
+
+	return true;
 }
 
 static __always_inline void kvm_lockdep_assert_mmu_lock_held(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 5fdf63090451..b046ab5137a1 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -80,12 +80,12 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 			 int *root_level);
 
 #ifdef CONFIG_X86_64
-void kvm_mmu_init_tdp_mmu(struct kvm *kvm);
+bool kvm_mmu_init_tdp_mmu(struct kvm *kvm);
 void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
 static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; }
 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; }
 #else
-static inline void kvm_mmu_init_tdp_mmu(struct kvm *kvm) {}
+static inline bool kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return false; }
 static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {}
 static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; }
 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 03b6bcff2a53..fdc1b2759771 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10920,6 +10920,8 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
 		int lpages;
 		int level = i + 1;
 
+		WARN_ON(slot->arch.rmap[i]);
+
 		lpages = gfn_to_index(slot->base_gfn + npages - 1,
 				      slot->base_gfn, level) + 1;
 
@@ -10935,6 +10937,46 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
 	return 0;
 }
 
+int alloc_all_memslots_rmaps(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *slot;
+	int r = 0;
+	int i;
+
+	/*
+	 * Check memslots_have_rmaps early before acquiring the
+	 * slots_arch_lock below.
+	 */
+	if (smp_load_acquire(&kvm->arch.memslots_have_rmaps))
+		return 0;
+
+	mutex_lock(&kvm->slots_arch_lock);
+
+	/*
+	 * Read memslots_have_rmaps again, under the slots arch lock,
+	 * before allocating the rmaps
+	 */
+	if (smp_load_acquire(&kvm->arch.memslots_have_rmaps))
+		return 0;
+
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		slots = __kvm_memslots(kvm, i);
+		kvm_for_each_memslot(slot, slots) {
+			r = memslot_rmap_alloc(slot, slot->npages);
+			if (r) {
+				mutex_unlock(&kvm->slots_arch_lock);
+				return r;
+			}
+		}
+	}
+
+	/* Write rmap pointers before memslots_have_rmaps */
+	smp_store_release(&kvm->arch.memslots_have_rmaps, true);
+	mutex_unlock(&kvm->slots_arch_lock);
+	return 0;
+}
+
 static int kvm_alloc_memslot_metadata(struct kvm *kvm,
 				      struct kvm_memory_slot *slot,
 				      unsigned long npages)
@@ -10949,7 +10991,8 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
 	 */
 	memset(&slot->arch, 0, sizeof(slot->arch));
 
-	if (kvm->arch.memslots_have_rmaps) {
+	/* Read memslots_have_rmaps before allocating the rmaps */
+	if (smp_load_acquire(&kvm->arch.memslots_have_rmaps)) {
 		r = memslot_rmap_alloc(slot, npages);
 		if (r)
 			return r;
-- 
2.31.1.607.g51e8a6a459-goog


  parent reply	other threads:[~2021-05-11 17:16 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-11 17:16 [PATCH v4 0/7] Lazily allocate memslot rmaps Ben Gardon
2021-05-11 17:16 ` [PATCH v4 1/7] KVM: x86/mmu: Deduplicate rmap freeing Ben Gardon
2021-05-11 17:16 ` [PATCH v4 2/7] KVM: x86/mmu: Factor out allocating memslot rmap Ben Gardon
2021-05-11 17:56   ` Sean Christopherson
2021-05-11 18:17     ` Ben Gardon
2021-05-11 18:56       ` David Hildenbrand
2021-05-11 17:16 ` [PATCH v4 3/7] KVM: mmu: Refactor memslot copy Ben Gardon
2021-05-11 18:47   ` Sean Christopherson
2021-05-11 17:16 ` [PATCH v4 4/7] KVM: mmu: Add slots_arch_lock for memslot arch fields Ben Gardon
2021-05-11 18:55   ` Sean Christopherson
2021-05-11 19:21   ` Sean Christopherson
2021-05-11 19:29     ` Paolo Bonzini
2021-05-11 17:16 ` [PATCH v4 5/7] KVM: x86/mmu: Add a field to control memslot rmap allocation Ben Gardon
2021-05-11 17:16 ` [PATCH v4 6/7] KVM: x86/mmu: Skip rmap operations if rmaps not allocated Ben Gardon
2021-05-11 19:51   ` Sean Christopherson
2021-05-11 17:16 ` Ben Gardon [this message]
2021-05-11 18:57   ` [PATCH v4 7/7] KVM: x86/mmu: Lazily allocate memslot rmaps Sean Christopherson
2021-05-11 20:03   ` Sean Christopherson
2021-05-11 20:04     ` Sean Christopherson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210511171610.170160-8-bgardon@google.com \
    --to=bgardon@google.com \
    --cc=david@redhat.com \
    --cc=kai.huang@intel.com \
    --cc=kernellwp@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=pshier@google.com \
    --cc=seanjc@google.com \
    --cc=xiaoguangrong.eric@gmail.com \
    --cc=yulei.kernel@gmail.com \
    --cc=zhukeqian1@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.