LKML Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH v2 0/7] Lazily allocate memslot rmaps
@ 2021-04-29 21:18 Ben Gardon
  2021-04-29 21:18 ` [PATCH v2 1/7] KVM: x86/mmu: Track if shadow MMU active Ben Gardon
                   ` (7 more replies)
  0 siblings, 8 replies; 26+ messages in thread
From: Ben Gardon @ 2021-04-29 21:18 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Peter Xu, Sean Christopherson, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong, Ben Gardon

This series enables KVM to save memory when using the TDP MMU by waiting
to allocate memslot rmaps until they are needed. To do this, KVM tracks
whether or not a shadow root has been allocated. In order to get away
with not allocating the rmaps, KVM must also be sure to skip operations
which iterate over the rmaps. If the TDP MMU is in use and we have not
allocated a shadow root, these operations would essentially be op-ops
anyway. Skipping the rmap operations has a secondary benefit of avoiding
acquiring the MMU lock in write mode in many cases, substantially
reducing MMU lock contention.

This series was tested on an Intel Skylake machine. With the TDP MMU off
and on, this introduced no new failures on kvm-unit-tests or KVM selftests.

Changelog:
v2:
	Incorporated feedback from Paolo and Sean
	Replaced the memslot_assignment_lock with slots_arch_lock, which
	has a larger critical section.

Ben Gardon (7):
  KVM: x86/mmu: Track if shadow MMU active
  KVM: x86/mmu: Skip rmap operations if shadow MMU inactive
  KVM: x86/mmu: Deduplicate rmap freeing
  KVM: x86/mmu: Factor out allocating memslot rmap
  KVM: mmu: Refactor memslot copy
  KVM: mmu: Add slots_arch_lock for memslot arch fields
  KVM: x86/mmu: Lazily allocate memslot rmaps

 arch/x86/include/asm/kvm_host.h |  13 +++
 arch/x86/kvm/mmu/mmu.c          | 153 +++++++++++++++++++++-----------
 arch/x86/kvm/mmu/mmu_internal.h |   2 +
 arch/x86/kvm/mmu/tdp_mmu.c      |   6 +-
 arch/x86/kvm/mmu/tdp_mmu.h      |   4 +-
 arch/x86/kvm/x86.c              | 110 +++++++++++++++++++----
 include/linux/kvm_host.h        |   9 ++
 virt/kvm/kvm_main.c             |  54 ++++++++---
 8 files changed, 264 insertions(+), 87 deletions(-)

-- 
2.31.1.527.g47e6f16901-goog


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH v2 1/7] KVM: x86/mmu: Track if shadow MMU active
  2021-04-29 21:18 [PATCH v2 0/7] Lazily allocate memslot rmaps Ben Gardon
@ 2021-04-29 21:18 ` Ben Gardon
  2021-05-03 13:42   ` Paolo Bonzini
  2021-05-04 19:55   ` Sean Christopherson
  2021-04-29 21:18 ` [PATCH v2 2/7] KVM: x86/mmu: Skip rmap operations if shadow MMU inactive Ben Gardon
                   ` (6 subsequent siblings)
  7 siblings, 2 replies; 26+ messages in thread
From: Ben Gardon @ 2021-04-29 21:18 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Peter Xu, Sean Christopherson, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong, Ben Gardon

Add a field to each VM to track if the shadow / legacy MMU is actually
in use. If the shadow MMU is not in use, then that knowledge opens the
door to other optimizations which will be added in future patches.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/mmu/mmu.c          | 10 +++++++++-
 arch/x86/kvm/mmu/mmu_internal.h |  2 ++
 arch/x86/kvm/mmu/tdp_mmu.c      |  6 ++++--
 arch/x86/kvm/mmu/tdp_mmu.h      |  4 ++--
 5 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ad22d4839bcc..3900dcf2439e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1122,6 +1122,8 @@ struct kvm_arch {
 	 */
 	spinlock_t tdp_mmu_pages_lock;
 #endif /* CONFIG_X86_64 */
+
+	bool shadow_mmu_active;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 930ac8a7e7c9..3975272321d0 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3110,6 +3110,11 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 	return ret;
 }
 
+void activate_shadow_mmu(struct kvm *kvm)
+{
+	kvm->arch.shadow_mmu_active = true;
+}
+
 static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
 			       struct list_head *invalid_list)
 {
@@ -3280,6 +3285,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		}
 	}
 
+	activate_shadow_mmu(vcpu->kvm);
+
 	write_lock(&vcpu->kvm->mmu_lock);
 	r = make_mmu_pages_available(vcpu);
 	if (r < 0)
@@ -5467,7 +5474,8 @@ void kvm_mmu_init_vm(struct kvm *kvm)
 {
 	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
 
-	kvm_mmu_init_tdp_mmu(kvm);
+	if (!kvm_mmu_init_tdp_mmu(kvm))
+		activate_shadow_mmu(kvm);
 
 	node->track_write = kvm_mmu_pte_write;
 	node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index f2546d6d390c..297a911c018c 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -165,4 +165,6 @@ void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
 void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 
+void activate_shadow_mmu(struct kvm *kvm);
+
 #endif /* __KVM_X86_MMU_INTERNAL_H */
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 83cbdbe5de5a..5342aca2c8e0 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -14,10 +14,10 @@ static bool __read_mostly tdp_mmu_enabled = false;
 module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
 
 /* Initializes the TDP MMU for the VM, if enabled. */
-void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
+bool kvm_mmu_init_tdp_mmu(struct kvm *kvm)
 {
 	if (!tdp_enabled || !READ_ONCE(tdp_mmu_enabled))
-		return;
+		return false;
 
 	/* This should not be changed for the lifetime of the VM. */
 	kvm->arch.tdp_mmu_enabled = true;
@@ -25,6 +25,8 @@ void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
 	INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
 	spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
 	INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages);
+
+	return true;
 }
 
 static __always_inline void kvm_lockdep_assert_mmu_lock_held(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 5fdf63090451..b046ab5137a1 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -80,12 +80,12 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 			 int *root_level);
 
 #ifdef CONFIG_X86_64
-void kvm_mmu_init_tdp_mmu(struct kvm *kvm);
+bool kvm_mmu_init_tdp_mmu(struct kvm *kvm);
 void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
 static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; }
 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; }
 #else
-static inline void kvm_mmu_init_tdp_mmu(struct kvm *kvm) {}
+static inline bool kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return false; }
 static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {}
 static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; }
 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; }
-- 
2.31.1.527.g47e6f16901-goog


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH v2 2/7] KVM: x86/mmu: Skip rmap operations if shadow MMU inactive
  2021-04-29 21:18 [PATCH v2 0/7] Lazily allocate memslot rmaps Ben Gardon
  2021-04-29 21:18 ` [PATCH v2 1/7] KVM: x86/mmu: Track if shadow MMU active Ben Gardon
@ 2021-04-29 21:18 ` Ben Gardon
  2021-04-29 21:18 ` [PATCH v2 3/7] KVM: x86/mmu: Deduplicate rmap freeing Ben Gardon
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 26+ messages in thread
From: Ben Gardon @ 2021-04-29 21:18 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Peter Xu, Sean Christopherson, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong, Ben Gardon

If the shadow MMU is not in use, and only the TDP MMU is being used to
manage the memory mappings for a VM, then many rmap operations can be
skipped as they are guaranteed to be no-ops. This saves some time which
would be spent on the rmap operation. It also avoids acquiring the MMU
lock in write mode for many operations.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/kvm/mmu/mmu.c | 128 +++++++++++++++++++++++++----------------
 1 file changed, 77 insertions(+), 51 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 3975272321d0..e252af46f205 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1189,6 +1189,10 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 	if (is_tdp_mmu_enabled(kvm))
 		kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot,
 				slot->base_gfn + gfn_offset, mask, true);
+
+	if (!kvm->arch.shadow_mmu_active)
+		return;
+
 	while (mask) {
 		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
 					  PG_LEVEL_4K, slot);
@@ -1218,6 +1222,10 @@ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 	if (is_tdp_mmu_enabled(kvm))
 		kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot,
 				slot->base_gfn + gfn_offset, mask, false);
+
+	if (!kvm->arch.shadow_mmu_active)
+		return;
+
 	while (mask) {
 		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
 					  PG_LEVEL_4K, slot);
@@ -1260,9 +1268,12 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 	int i;
 	bool write_protected = false;
 
-	for (i = PG_LEVEL_4K; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
-		rmap_head = __gfn_to_rmap(gfn, i, slot);
-		write_protected |= __rmap_write_protect(kvm, rmap_head, true);
+	if (kvm->arch.shadow_mmu_active) {
+		for (i = PG_LEVEL_4K; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
+			rmap_head = __gfn_to_rmap(gfn, i, slot);
+			write_protected |= __rmap_write_protect(kvm, rmap_head,
+								true);
+		}
 	}
 
 	if (is_tdp_mmu_enabled(kvm))
@@ -1433,9 +1444,10 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
 
 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	bool flush;
+	bool flush = false;
 
-	flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);
+	if (kvm->arch.shadow_mmu_active)
+		flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);
 
 	if (is_tdp_mmu_enabled(kvm))
 		flush |= kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
@@ -1445,9 +1457,10 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	bool flush;
+	bool flush = false;
 
-	flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmapp);
+	if (kvm->arch.shadow_mmu_active)
+		flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmapp);
 
 	if (is_tdp_mmu_enabled(kvm))
 		flush |= kvm_tdp_mmu_set_spte_gfn(kvm, range);
@@ -1500,9 +1513,10 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 
 bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	bool young;
+	bool young = false;
 
-	young = kvm_handle_gfn_range(kvm, range, kvm_age_rmapp);
+	if (kvm->arch.shadow_mmu_active)
+		young = kvm_handle_gfn_range(kvm, range, kvm_age_rmapp);
 
 	if (is_tdp_mmu_enabled(kvm))
 		young |= kvm_tdp_mmu_age_gfn_range(kvm, range);
@@ -1512,9 +1526,10 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 
 bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	bool young;
+	bool young = false;
 
-	young = kvm_handle_gfn_range(kvm, range, kvm_test_age_rmapp);
+	if (kvm->arch.shadow_mmu_active)
+		young = kvm_handle_gfn_range(kvm, range, kvm_test_age_rmapp);
 
 	if (is_tdp_mmu_enabled(kvm))
 		young |= kvm_tdp_mmu_test_age_gfn(kvm, range);
@@ -5447,7 +5462,8 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
 	 */
 	kvm_reload_remote_mmus(kvm);
 
-	kvm_zap_obsolete_pages(kvm);
+	if (kvm->arch.shadow_mmu_active)
+		kvm_zap_obsolete_pages(kvm);
 
 	write_unlock(&kvm->mmu_lock);
 
@@ -5498,29 +5514,29 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 	int i;
 	bool flush = false;
 
-	write_lock(&kvm->mmu_lock);
-	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
-		slots = __kvm_memslots(kvm, i);
-		kvm_for_each_memslot(memslot, slots) {
-			gfn_t start, end;
-
-			start = max(gfn_start, memslot->base_gfn);
-			end = min(gfn_end, memslot->base_gfn + memslot->npages);
-			if (start >= end)
-				continue;
+	if (kvm->arch.shadow_mmu_active) {
+		write_lock(&kvm->mmu_lock);
+		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+			slots = __kvm_memslots(kvm, i);
+			kvm_for_each_memslot(memslot, slots) {
+				gfn_t start, end;
+
+				start = max(gfn_start, memslot->base_gfn);
+				end = min(gfn_end, memslot->base_gfn + memslot->npages);
+				if (start >= end)
+					continue;
 
-			flush = slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
-							PG_LEVEL_4K,
-							KVM_MAX_HUGEPAGE_LEVEL,
-							start, end - 1, true, flush);
+				flush = slot_handle_level_range(kvm, memslot,
+						kvm_zap_rmapp, PG_LEVEL_4K,
+						KVM_MAX_HUGEPAGE_LEVEL, start,
+						end - 1, true, flush);
+			}
 		}
+		if (flush)
+			kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end);
+		write_unlock(&kvm->mmu_lock);
 	}
 
-	if (flush)
-		kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end);
-
-	write_unlock(&kvm->mmu_lock);
-
 	if (is_tdp_mmu_enabled(kvm)) {
 		flush = false;
 
@@ -5547,12 +5563,15 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 				      struct kvm_memory_slot *memslot,
 				      int start_level)
 {
-	bool flush;
+	bool flush = false;
 
-	write_lock(&kvm->mmu_lock);
-	flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
-				start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
-	write_unlock(&kvm->mmu_lock);
+	if (kvm->arch.shadow_mmu_active) {
+		write_lock(&kvm->mmu_lock);
+		flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
+					  start_level, KVM_MAX_HUGEPAGE_LEVEL,
+					  false);
+		write_unlock(&kvm->mmu_lock);
+	}
 
 	if (is_tdp_mmu_enabled(kvm)) {
 		read_lock(&kvm->mmu_lock);
@@ -5622,16 +5641,15 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 	struct kvm_memory_slot *slot = (struct kvm_memory_slot *)memslot;
 	bool flush;
 
-	write_lock(&kvm->mmu_lock);
-	flush = slot_handle_leaf(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
-
-	if (flush)
-		kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
-	write_unlock(&kvm->mmu_lock);
+	if (kvm->arch.shadow_mmu_active) {
+		write_lock(&kvm->mmu_lock);
+		flush = slot_handle_leaf(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
+		if (flush)
+			kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+		write_unlock(&kvm->mmu_lock);
+	}
 
 	if (is_tdp_mmu_enabled(kvm)) {
-		flush = false;
-
 		read_lock(&kvm->mmu_lock);
 		flush = kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot, flush);
 		if (flush)
@@ -5658,11 +5676,14 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot)
 {
-	bool flush;
+	bool flush = false;
 
-	write_lock(&kvm->mmu_lock);
-	flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
-	write_unlock(&kvm->mmu_lock);
+	if (kvm->arch.shadow_mmu_active) {
+		write_lock(&kvm->mmu_lock);
+		flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty,
+					 false);
+		write_unlock(&kvm->mmu_lock);
+	}
 
 	if (is_tdp_mmu_enabled(kvm)) {
 		read_lock(&kvm->mmu_lock);
@@ -5687,6 +5708,14 @@ void kvm_mmu_zap_all(struct kvm *kvm)
 	int ign;
 
 	write_lock(&kvm->mmu_lock);
+	if (is_tdp_mmu_enabled(kvm))
+		kvm_tdp_mmu_zap_all(kvm);
+
+	if (!kvm->arch.shadow_mmu_active) {
+		write_unlock(&kvm->mmu_lock);
+		return;
+	}
+
 restart:
 	list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
 		if (WARN_ON(sp->role.invalid))
@@ -5699,9 +5728,6 @@ void kvm_mmu_zap_all(struct kvm *kvm)
 
 	kvm_mmu_commit_zap_page(kvm, &invalid_list);
 
-	if (is_tdp_mmu_enabled(kvm))
-		kvm_tdp_mmu_zap_all(kvm);
-
 	write_unlock(&kvm->mmu_lock);
 }
 
-- 
2.31.1.527.g47e6f16901-goog


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH v2 3/7] KVM: x86/mmu: Deduplicate rmap freeing
  2021-04-29 21:18 [PATCH v2 0/7] Lazily allocate memslot rmaps Ben Gardon
  2021-04-29 21:18 ` [PATCH v2 1/7] KVM: x86/mmu: Track if shadow MMU active Ben Gardon
  2021-04-29 21:18 ` [PATCH v2 2/7] KVM: x86/mmu: Skip rmap operations if shadow MMU inactive Ben Gardon
@ 2021-04-29 21:18 ` Ben Gardon
  2021-04-29 21:18 ` [PATCH v2 4/7] KVM: x86/mmu: Factor out allocating memslot rmap Ben Gardon
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 26+ messages in thread
From: Ben Gardon @ 2021-04-29 21:18 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Peter Xu, Sean Christopherson, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong, Ben Gardon

Small code deduplication. No functional change expected.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/kvm/x86.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cf3b67679cf0..5bcf07465c47 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10818,17 +10818,23 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvm_hv_destroy_vm(kvm);
 }
 
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
+static void free_memslot_rmap(struct kvm_memory_slot *slot)
 {
 	int i;
 
 	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
 		kvfree(slot->arch.rmap[i]);
 		slot->arch.rmap[i] = NULL;
+	}
+}
 
-		if (i == 0)
-			continue;
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	int i;
+
+	free_memslot_rmap(slot);
 
+	for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
 		kvfree(slot->arch.lpage_info[i - 1]);
 		slot->arch.lpage_info[i - 1] = NULL;
 	}
@@ -10894,12 +10900,9 @@ static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
 	return 0;
 
 out_free:
-	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
-		kvfree(slot->arch.rmap[i]);
-		slot->arch.rmap[i] = NULL;
-		if (i == 0)
-			continue;
+	free_memslot_rmap(slot);
 
+	for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
 		kvfree(slot->arch.lpage_info[i - 1]);
 		slot->arch.lpage_info[i - 1] = NULL;
 	}
-- 
2.31.1.527.g47e6f16901-goog


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH v2 4/7] KVM: x86/mmu: Factor out allocating memslot rmap
  2021-04-29 21:18 [PATCH v2 0/7] Lazily allocate memslot rmaps Ben Gardon
                   ` (2 preceding siblings ...)
  2021-04-29 21:18 ` [PATCH v2 3/7] KVM: x86/mmu: Deduplicate rmap freeing Ben Gardon
@ 2021-04-29 21:18 ` Ben Gardon
  2021-04-29 21:18 ` [PATCH v2 5/7] KVM: mmu: Refactor memslot copy Ben Gardon
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 26+ messages in thread
From: Ben Gardon @ 2021-04-29 21:18 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Peter Xu, Sean Christopherson, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong, Ben Gardon

Small refactor to facilitate allocating rmaps for all memslots at once.

No functional change expected.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/kvm/x86.c | 41 ++++++++++++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5bcf07465c47..fc32a7dbe4c4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10842,10 +10842,37 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
 	kvm_page_track_free_memslot(slot);
 }
 
+static int alloc_memslot_rmap(struct kvm_memory_slot *slot,
+			      unsigned long npages)
+{
+	int i;
+
+	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+		int lpages;
+		int level = i + 1;
+
+		lpages = gfn_to_index(slot->base_gfn + npages - 1,
+				      slot->base_gfn, level) + 1;
+
+		slot->arch.rmap[i] =
+			kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
+				 GFP_KERNEL_ACCOUNT);
+		if (!slot->arch.rmap[i])
+			goto out_free;
+	}
+
+	return 0;
+
+out_free:
+	free_memslot_rmap(slot);
+	return -ENOMEM;
+}
+
 static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
 				      unsigned long npages)
 {
 	int i;
+	int r;
 
 	/*
 	 * Clear out the previous array pointers for the KVM_MR_MOVE case.  The
@@ -10854,7 +10881,11 @@ static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
 	 */
 	memset(&slot->arch, 0, sizeof(slot->arch));
 
-	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+	r = alloc_memslot_rmap(slot, npages);
+	if (r)
+		return r;
+
+	for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
 		struct kvm_lpage_info *linfo;
 		unsigned long ugfn;
 		int lpages;
@@ -10863,14 +10894,6 @@ static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
 		lpages = gfn_to_index(slot->base_gfn + npages - 1,
 				      slot->base_gfn, level) + 1;
 
-		slot->arch.rmap[i] =
-			kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
-				 GFP_KERNEL_ACCOUNT);
-		if (!slot->arch.rmap[i])
-			goto out_free;
-		if (i == 0)
-			continue;
-
 		linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
 		if (!linfo)
 			goto out_free;
-- 
2.31.1.527.g47e6f16901-goog


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH v2 5/7] KVM: mmu: Refactor memslot copy
  2021-04-29 21:18 [PATCH v2 0/7] Lazily allocate memslot rmaps Ben Gardon
                   ` (3 preceding siblings ...)
  2021-04-29 21:18 ` [PATCH v2 4/7] KVM: x86/mmu: Factor out allocating memslot rmap Ben Gardon
@ 2021-04-29 21:18 ` Ben Gardon
  2021-04-29 21:18 ` [PATCH v2 6/7] KVM: mmu: Add slots_arch_lock for memslot arch fields Ben Gardon
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 26+ messages in thread
From: Ben Gardon @ 2021-04-29 21:18 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Peter Xu, Sean Christopherson, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong, Ben Gardon

Factor out copying kvm_memslots from allocating the memory for new ones
in preparation for adding a new lock to protect the arch-specific fields
of the memslots.

No functional change intended.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 virt/kvm/kvm_main.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2799c6660cce..c8010f55e368 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1306,6 +1306,18 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 	return old_memslots;
 }
 
+static size_t kvm_memslots_size(int slots)
+{
+	return sizeof(struct kvm_memslots) +
+	       (sizeof(struct kvm_memory_slot) * slots);
+}
+
+static void kvm_copy_memslots(struct kvm_memslots *from,
+			      struct kvm_memslots *to)
+{
+	memcpy(to, from, kvm_memslots_size(from->used_slots));
+}
+
 /*
  * Note, at a minimum, the current number of used slots must be allocated, even
  * when deleting a memslot, as we need a complete duplicate of the memslots for
@@ -1315,19 +1327,16 @@ static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
 					     enum kvm_mr_change change)
 {
 	struct kvm_memslots *slots;
-	size_t old_size, new_size;
-
-	old_size = sizeof(struct kvm_memslots) +
-		   (sizeof(struct kvm_memory_slot) * old->used_slots);
+	size_t new_size;
 
 	if (change == KVM_MR_CREATE)
-		new_size = old_size + sizeof(struct kvm_memory_slot);
+		new_size = kvm_memslots_size(old->used_slots + 1);
 	else
-		new_size = old_size;
+		new_size = kvm_memslots_size(old->used_slots);
 
 	slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT);
 	if (likely(slots))
-		memcpy(slots, old, old_size);
+		kvm_copy_memslots(old, slots);
 
 	return slots;
 }
-- 
2.31.1.527.g47e6f16901-goog


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH v2 6/7] KVM: mmu: Add slots_arch_lock for memslot arch fields
  2021-04-29 21:18 [PATCH v2 0/7] Lazily allocate memslot rmaps Ben Gardon
                   ` (4 preceding siblings ...)
  2021-04-29 21:18 ` [PATCH v2 5/7] KVM: mmu: Refactor memslot copy Ben Gardon
@ 2021-04-29 21:18 ` Ben Gardon
  2021-05-03 13:29   ` Paolo Bonzini
  2021-04-29 21:18 ` [PATCH v2 7/7] KVM: x86/mmu: Lazily allocate memslot rmaps Ben Gardon
  2021-05-03 13:44 ` [PATCH v2 0/7] " Paolo Bonzini
  7 siblings, 1 reply; 26+ messages in thread
From: Ben Gardon @ 2021-04-29 21:18 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Peter Xu, Sean Christopherson, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong, Ben Gardon

Add a new lock to protect the arch-specific fields of memslots if they
need to be modified in a kvm->srcu read critical section. A future
commit will use this lock to lazily allocate memslot rmaps for x86.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 include/linux/kvm_host.h |  9 +++++++++
 virt/kvm/kvm_main.c      | 31 ++++++++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8895b95b6a22..2d5e797fbb08 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -472,6 +472,15 @@ struct kvm {
 #endif /* KVM_HAVE_MMU_RWLOCK */
 
 	struct mutex slots_lock;
+
+	/*
+	 * Protects the arch-specific fields of struct kvm_memory_slots in
+	 * use by the VM. To be used under the slots_lock (above) or in a
+	 * kvm->srcu read cirtical section where acquiring the slots_lock
+	 * would lead to deadlock with the synchronize_srcu in
+	 * install_new_memslots.
+	 */
+	struct mutex slots_arch_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c8010f55e368..97b03fa2d0c8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -908,6 +908,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
 	mutex_init(&kvm->slots_lock);
+	mutex_init(&kvm->slots_arch_lock);
 	INIT_LIST_HEAD(&kvm->devices);
 
 	BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
@@ -1280,6 +1281,10 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 	slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
 
 	rcu_assign_pointer(kvm->memslots[as_id], slots);
+
+	/* Acquired in kvm_set_memslot. */
+	mutex_unlock(&kvm->slots_arch_lock);
+
 	synchronize_srcu_expedited(&kvm->srcu);
 
 	/*
@@ -1351,6 +1356,9 @@ static int kvm_set_memslot(struct kvm *kvm,
 	struct kvm_memslots *slots;
 	int r;
 
+	/* Released in install_new_memslots. */
+	mutex_lock(&kvm->slots_arch_lock);
+
 	slots = kvm_dup_memslots(__kvm_memslots(kvm, as_id), change);
 	if (!slots)
 		return -ENOMEM;
@@ -1364,10 +1372,9 @@ static int kvm_set_memslot(struct kvm *kvm,
 		slot->flags |= KVM_MEMSLOT_INVALID;
 
 		/*
-		 * We can re-use the old memslots, the only difference from the
-		 * newly installed memslots is the invalid flag, which will get
-		 * dropped by update_memslots anyway.  We'll also revert to the
-		 * old memslots if preparing the new memory region fails.
+		 * We can re-use the memory from the old memslots.
+		 * It will be overwritten with a copy of the new memslots
+		 * after reacquiring the slots_arch_lock below.
 		 */
 		slots = install_new_memslots(kvm, as_id, slots);
 
@@ -1379,6 +1386,17 @@ static int kvm_set_memslot(struct kvm *kvm,
 		 *	- kvm_is_visible_gfn (mmu_check_root)
 		 */
 		kvm_arch_flush_shadow_memslot(kvm, slot);
+
+		/* Released in install_new_memslots. */
+		mutex_lock(&kvm->slots_arch_lock);
+
+		/*
+		 * The arch-specific fields of the memslots could have changed
+		 * between releasing the slots_arch_lock in
+		 * install_new_memslots and here, so get a fresh copy of the
+		 * slots.
+		 */
+		kvm_copy_memslots(__kvm_memslots(kvm, as_id), slots);
 	}
 
 	r = kvm_arch_prepare_memory_region(kvm, new, mem, change);
@@ -1394,8 +1412,11 @@ static int kvm_set_memslot(struct kvm *kvm,
 	return 0;
 
 out_slots:
-	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE)
+	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+		slot = id_to_memslot(slots, old->id);
+		slot->flags &= ~KVM_MEMSLOT_INVALID;
 		slots = install_new_memslots(kvm, as_id, slots);
+	}
 	kvfree(slots);
 	return r;
 }
-- 
2.31.1.527.g47e6f16901-goog


^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH v2 7/7] KVM: x86/mmu: Lazily allocate memslot rmaps
  2021-04-29 21:18 [PATCH v2 0/7] Lazily allocate memslot rmaps Ben Gardon
                   ` (5 preceding siblings ...)
  2021-04-29 21:18 ` [PATCH v2 6/7] KVM: mmu: Add slots_arch_lock for memslot arch fields Ben Gardon
@ 2021-04-29 21:18 ` Ben Gardon
  2021-05-03 13:42   ` Paolo Bonzini
                     ` (2 more replies)
  2021-05-03 13:44 ` [PATCH v2 0/7] " Paolo Bonzini
  7 siblings, 3 replies; 26+ messages in thread
From: Ben Gardon @ 2021-04-29 21:18 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Peter Xu, Sean Christopherson, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong, Ben Gardon

If the TDP MMU is in use, wait to allocate the rmaps until the shadow
MMU is actually used. (i.e. a nested VM is launched.) This saves memory
equal to 0.2% of guest memory in cases where the TDP MMU is used and
there are no nested guests involved.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/include/asm/kvm_host.h | 11 +++++++
 arch/x86/kvm/mmu/mmu.c          | 21 +++++++++++--
 arch/x86/kvm/mmu/mmu_internal.h |  2 +-
 arch/x86/kvm/x86.c              | 54 ++++++++++++++++++++++++++++++---
 4 files changed, 80 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3900dcf2439e..b8633ed00a6a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1124,6 +1124,15 @@ struct kvm_arch {
 #endif /* CONFIG_X86_64 */
 
 	bool shadow_mmu_active;
+
+	/*
+	 * If set, the rmap should be allocated for any newly created or
+	 * modified memslots. If allocating rmaps lazily, this may be set
+	 * before the rmaps are allocated for existing memslots, but
+	 * shadow_mmu_active will not be set until after the rmaps are fully
+	 * allocated.
+	 */
+	bool alloc_memslot_rmaps;
 };
 
 struct kvm_vm_stat {
@@ -1855,4 +1864,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 
 int kvm_cpu_dirty_log_size(void);
 
+int alloc_all_memslots_rmaps(struct kvm *kvm);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e252af46f205..b2a6585bd978 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3125,9 +3125,17 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 	return ret;
 }
 
-void activate_shadow_mmu(struct kvm *kvm)
+int activate_shadow_mmu(struct kvm *kvm)
 {
+	int r;
+
+	r = alloc_all_memslots_rmaps(kvm);
+	if (r)
+		return r;
+
 	kvm->arch.shadow_mmu_active = true;
+
+	return 0;
 }
 
 static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
@@ -3300,7 +3308,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		}
 	}
 
-	activate_shadow_mmu(vcpu->kvm);
+	r = activate_shadow_mmu(vcpu->kvm);
+	if (r)
+		return r;
 
 	write_lock(&vcpu->kvm->mmu_lock);
 	r = make_mmu_pages_available(vcpu);
@@ -5491,7 +5501,12 @@ void kvm_mmu_init_vm(struct kvm *kvm)
 	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
 
 	if (!kvm_mmu_init_tdp_mmu(kvm))
-		activate_shadow_mmu(kvm);
+		/*
+		 * No memslots can have been allocated at this point.
+		 * activate_shadow_mmu won't actually need to allocate
+		 * rmaps, so it cannot fail.
+		 */
+		WARN_ON(activate_shadow_mmu(kvm));
 
 	node->track_write = kvm_mmu_pte_write;
 	node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 297a911c018c..c6b21a916452 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -165,6 +165,6 @@ void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
 void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 
-void activate_shadow_mmu(struct kvm *kvm);
+int activate_shadow_mmu(struct kvm *kvm);
 
 #endif /* __KVM_X86_MMU_INTERNAL_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fc32a7dbe4c4..c72b35cbaef7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10842,11 +10842,24 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
 	kvm_page_track_free_memslot(slot);
 }
 
-static int alloc_memslot_rmap(struct kvm_memory_slot *slot,
+static int alloc_memslot_rmap(struct kvm *kvm, struct kvm_memory_slot *slot,
 			      unsigned long npages)
 {
 	int i;
 
+	if (!kvm->arch.alloc_memslot_rmaps)
+		return 0;
+
+	/*
+	 * All rmaps for a memslot should be allocated either before
+	 * the memslot is installed (in which case no other threads
+	 * should have a pointer to it), or under the
+	 * slots_arch_lock. Avoid overwriting already allocated
+	 * rmaps.
+	 */
+	if (slot->arch.rmap[0])
+		return 0;
+
 	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
 		int lpages;
 		int level = i + 1;
@@ -10868,7 +10881,40 @@ static int alloc_memslot_rmap(struct kvm_memory_slot *slot,
 	return -ENOMEM;
 }
 
-static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
+int alloc_memslots_rmaps(struct kvm *kvm, struct kvm_memslots *slots)
+{
+	struct kvm_memory_slot *slot;
+	int r = 0;
+
+	kvm_for_each_memslot(slot, slots) {
+		r = alloc_memslot_rmap(kvm, slot, slot->npages);
+		if (r)
+			break;
+	}
+	return r;
+}
+
+int alloc_all_memslots_rmaps(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	int r = 0;
+	int i;
+
+	mutex_lock(&kvm->slots_arch_lock);
+	kvm->arch.alloc_memslot_rmaps = true;
+
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		slots = __kvm_memslots(kvm, i);
+		r = alloc_memslots_rmaps(kvm, slots);
+		if (r)
+			break;
+	}
+	mutex_unlock(&kvm->slots_arch_lock);
+	return r;
+}
+
+static int kvm_alloc_memslot_metadata(struct kvm *kvm,
+				      struct kvm_memory_slot *slot,
 				      unsigned long npages)
 {
 	int i;
@@ -10881,7 +10927,7 @@ static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
 	 */
 	memset(&slot->arch, 0, sizeof(slot->arch));
 
-	r = alloc_memslot_rmap(slot, npages);
+	r = alloc_memslot_rmap(kvm, slot, npages);
 	if (r)
 		return r;
 
@@ -10954,7 +11000,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				enum kvm_mr_change change)
 {
 	if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
-		return kvm_alloc_memslot_metadata(memslot,
+		return kvm_alloc_memslot_metadata(kvm, memslot,
 						  mem->memory_size >> PAGE_SHIFT);
 	return 0;
 }
-- 
2.31.1.527.g47e6f16901-goog


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 6/7] KVM: mmu: Add slots_arch_lock for memslot arch fields
  2021-04-29 21:18 ` [PATCH v2 6/7] KVM: mmu: Add slots_arch_lock for memslot arch fields Ben Gardon
@ 2021-05-03 13:29   ` Paolo Bonzini
  0 siblings, 0 replies; 26+ messages in thread
From: Paolo Bonzini @ 2021-05-03 13:29 UTC (permalink / raw)
  To: Ben Gardon, linux-kernel, kvm
  Cc: Peter Xu, Sean Christopherson, Peter Shier, Junaid Shahid,
	Jim Mattson, Yulei Zhang, Wanpeng Li, Vitaly Kuznetsov,
	Xiao Guangrong

On 29/04/21 23:18, Ben Gardon wrote:
> Add a new lock to protect the arch-specific fields of memslots if they
> need to be modified in a kvm->srcu read critical section. A future
> commit will use this lock to lazily allocate memslot rmaps for x86.

Here there should be a blurb about the possible races that can happen 
and why we decided for the slots_arch_lock.

> Signed-off-by: Ben Gardon <bgardon@google.com>
> ---
>   include/linux/kvm_host.h |  9 +++++++++
>   virt/kvm/kvm_main.c      | 31 ++++++++++++++++++++++++++-----
>   2 files changed, 35 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 8895b95b6a22..2d5e797fbb08 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -472,6 +472,15 @@ struct kvm {
>   #endif /* KVM_HAVE_MMU_RWLOCK */
>   
>   	struct mutex slots_lock;
> +
> +	/*
> +	 * Protects the arch-specific fields of struct kvm_memory_slots in
> +	 * use by the VM. To be used under the slots_lock (above) or in a
> +	 * kvm->srcu read cirtical section where acquiring the slots_lock
> +	 * would lead to deadlock with the synchronize_srcu in
> +	 * install_new_memslots.
> +	 */

I think usage under slots_lock need not be mentioned here.  More like this:

	/*
	 * Protects the arch-specific fields of struct kvm_memory_slots
	 * in use by the VM.  Usually these are initialized by
	 * kvm_arch_prepare_memory_region and then protected by
	 * kvm->srcu; however, if they need to be initialized outside
	 * kvm_arch_prepare_memory_region, slots_arch_lock can
	 * be used instead as it is also held when calling
	 * kvm_arch_prepare_memory_region itself.  Note that using
	 * slots_lock would lead to deadlock with install_new_memslots,
	 * because it is held during synchronize_srcu:
	 *
	 *	idx = srcu_read_lock(&kvm->srcu);
	 *	mutex_lock(&kvm->slots_lock);
	 *				mutex_lock(&kvm->slots_lock);
	 *				synchronize_srcu(&kvm->srcu);
	 */

(Though a better place for this is in 
Documentation/virtual/kvm/locking.rst).

Paolo

> +	struct mutex slots_arch_lock;
>   	struct mm_struct *mm; /* userspace tied to this vm */
>   	struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
>   	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index c8010f55e368..97b03fa2d0c8 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -908,6 +908,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
>   	mutex_init(&kvm->lock);
>   	mutex_init(&kvm->irq_lock);
>   	mutex_init(&kvm->slots_lock);
> +	mutex_init(&kvm->slots_arch_lock);
>   	INIT_LIST_HEAD(&kvm->devices);
>   
>   	BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
> @@ -1280,6 +1281,10 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
>   	slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
>   
>   	rcu_assign_pointer(kvm->memslots[as_id], slots);
> +
> +	/* Acquired in kvm_set_memslot. */
> +	mutex_unlock(&kvm->slots_arch_lock);
> +
>   	synchronize_srcu_expedited(&kvm->srcu);
>   
>   	/*
> @@ -1351,6 +1356,9 @@ static int kvm_set_memslot(struct kvm *kvm,
>   	struct kvm_memslots *slots;
>   	int r;
>   
> +	/* Released in install_new_memslots. */
> +	mutex_lock(&kvm->slots_arch_lock);
> +
>   	slots = kvm_dup_memslots(__kvm_memslots(kvm, as_id), change);
>   	if (!slots)
>   		return -ENOMEM;
> @@ -1364,10 +1372,9 @@ static int kvm_set_memslot(struct kvm *kvm,
>   		slot->flags |= KVM_MEMSLOT_INVALID;
>   
>   		/*
> -		 * We can re-use the old memslots, the only difference from the
> -		 * newly installed memslots is the invalid flag, which will get
> -		 * dropped by update_memslots anyway.  We'll also revert to the
> -		 * old memslots if preparing the new memory region fails.
> +		 * We can re-use the memory from the old memslots.
> +		 * It will be overwritten with a copy of the new memslots
> +		 * after reacquiring the slots_arch_lock below.
>   		 */
>   		slots = install_new_memslots(kvm, as_id, slots);
>   
> @@ -1379,6 +1386,17 @@ static int kvm_set_memslot(struct kvm *kvm,
>   		 *	- kvm_is_visible_gfn (mmu_check_root)
>   		 */
>   		kvm_arch_flush_shadow_memslot(kvm, slot);
> +
> +		/* Released in install_new_memslots. */
> +		mutex_lock(&kvm->slots_arch_lock);
> +
> +		/*
> +		 * The arch-specific fields of the memslots could have changed
> +		 * between releasing the slots_arch_lock in
> +		 * install_new_memslots and here, so get a fresh copy of the
> +		 * slots.
> +		 */
> +		kvm_copy_memslots(__kvm_memslots(kvm, as_id), slots);
>   	}
>   
>   	r = kvm_arch_prepare_memory_region(kvm, new, mem, change);
> @@ -1394,8 +1412,11 @@ static int kvm_set_memslot(struct kvm *kvm,
>   	return 0;
>   
>   out_slots:
> -	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE)
> +	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
> +		slot = id_to_memslot(slots, old->id);
> +		slot->flags &= ~KVM_MEMSLOT_INVALID;
>   		slots = install_new_memslots(kvm, as_id, slots);
> +	}
>   	kvfree(slots);
>   	return r;
>   }
> 


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 1/7] KVM: x86/mmu: Track if shadow MMU active
  2021-04-29 21:18 ` [PATCH v2 1/7] KVM: x86/mmu: Track if shadow MMU active Ben Gardon
@ 2021-05-03 13:42   ` Paolo Bonzini
  2021-05-04 17:26     ` Ben Gardon
  2021-05-04 19:55   ` Sean Christopherson
  1 sibling, 1 reply; 26+ messages in thread
From: Paolo Bonzini @ 2021-05-03 13:42 UTC (permalink / raw)
  To: Ben Gardon, linux-kernel, kvm
  Cc: Peter Xu, Sean Christopherson, Peter Shier, Junaid Shahid,
	Jim Mattson, Yulei Zhang, Wanpeng Li, Vitaly Kuznetsov,
	Xiao Guangrong

On 29/04/21 23:18, Ben Gardon wrote:
> +void activate_shadow_mmu(struct kvm *kvm)
> +{
> +	kvm->arch.shadow_mmu_active = true;
> +}
> +

I think there's no lock protecting both the write and the read side.
Therefore this should be an smp_store_release, and all checks in
patch 2 should be an smp_load_acquire.

Also, the assignments to slot->arch.rmap in patch 4 (alloc_memslot_rmap)
should be an rcu_assign_pointer, while __gfn_to_rmap must be changed like so:

+	struct kvm_rmap_head *head;
...
-	return &slot->arch.rmap[level - PG_LEVEL_4K][idx];
+	head = srcu_dereference(slot->arch.rmap[level - PG_LEVEL_4K], &kvm->srcu,
+				 lockdep_is_held(&kvm->slots_arch_lock));
+       return &head[idx];

Paolo


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 7/7] KVM: x86/mmu: Lazily allocate memslot rmaps
  2021-04-29 21:18 ` [PATCH v2 7/7] KVM: x86/mmu: Lazily allocate memslot rmaps Ben Gardon
@ 2021-05-03 13:42   ` Paolo Bonzini
  2021-05-03 17:29     ` Ben Gardon
  2021-05-04 20:13   ` Sean Christopherson
  2021-05-04 20:22   ` Paolo Bonzini
  2 siblings, 1 reply; 26+ messages in thread
From: Paolo Bonzini @ 2021-05-03 13:42 UTC (permalink / raw)
  To: Ben Gardon, linux-kernel, kvm
  Cc: Peter Xu, Sean Christopherson, Peter Shier, Junaid Shahid,
	Jim Mattson, Yulei Zhang, Wanpeng Li, Vitaly Kuznetsov,
	Xiao Guangrong

On 29/04/21 23:18, Ben Gardon wrote:
> +int alloc_memslots_rmaps(struct kvm *kvm, struct kvm_memslots *slots)

This can be static, can't it?

Paolo

> +{
> +	struct kvm_memory_slot *slot;
> +	int r = 0;
> +
> +	kvm_for_each_memslot(slot, slots) {
> +		r = alloc_memslot_rmap(kvm, slot, slot->npages);
> +		if (r)
> +			break;
> +	}
> +	return r;
> +}
> +


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 0/7] Lazily allocate memslot rmaps
  2021-04-29 21:18 [PATCH v2 0/7] Lazily allocate memslot rmaps Ben Gardon
                   ` (6 preceding siblings ...)
  2021-04-29 21:18 ` [PATCH v2 7/7] KVM: x86/mmu: Lazily allocate memslot rmaps Ben Gardon
@ 2021-05-03 13:44 ` Paolo Bonzini
  2021-05-03 17:31   ` Ben Gardon
  7 siblings, 1 reply; 26+ messages in thread
From: Paolo Bonzini @ 2021-05-03 13:44 UTC (permalink / raw)
  To: Ben Gardon, linux-kernel, kvm
  Cc: Peter Xu, Sean Christopherson, Peter Shier, Junaid Shahid,
	Jim Mattson, Yulei Zhang, Wanpeng Li, Vitaly Kuznetsov,
	Xiao Guangrong

On 29/04/21 23:18, Ben Gardon wrote:
> This series enables KVM to save memory when using the TDP MMU by waiting
> to allocate memslot rmaps until they are needed. To do this, KVM tracks
> whether or not a shadow root has been allocated. In order to get away
> with not allocating the rmaps, KVM must also be sure to skip operations
> which iterate over the rmaps. If the TDP MMU is in use and we have not
> allocated a shadow root, these operations would essentially be op-ops
> anyway. Skipping the rmap operations has a secondary benefit of avoiding
> acquiring the MMU lock in write mode in many cases, substantially
> reducing MMU lock contention.
> 
> This series was tested on an Intel Skylake machine. With the TDP MMU off
> and on, this introduced no new failures on kvm-unit-tests or KVM selftests.

Thanks, I only reported some technicalities in the ordering of loads 
(which matter since the loads happen with SRCU protection only).  Apart 
from this, this looks fine!

Paolo

> Changelog:
> v2:
> 	Incorporated feedback from Paolo and Sean
> 	Replaced the memslot_assignment_lock with slots_arch_lock, which
> 	has a larger critical section.
> 
> Ben Gardon (7):
>    KVM: x86/mmu: Track if shadow MMU active
>    KVM: x86/mmu: Skip rmap operations if shadow MMU inactive
>    KVM: x86/mmu: Deduplicate rmap freeing
>    KVM: x86/mmu: Factor out allocating memslot rmap
>    KVM: mmu: Refactor memslot copy
>    KVM: mmu: Add slots_arch_lock for memslot arch fields
>    KVM: x86/mmu: Lazily allocate memslot rmaps
> 
>   arch/x86/include/asm/kvm_host.h |  13 +++
>   arch/x86/kvm/mmu/mmu.c          | 153 +++++++++++++++++++++-----------
>   arch/x86/kvm/mmu/mmu_internal.h |   2 +
>   arch/x86/kvm/mmu/tdp_mmu.c      |   6 +-
>   arch/x86/kvm/mmu/tdp_mmu.h      |   4 +-
>   arch/x86/kvm/x86.c              | 110 +++++++++++++++++++----
>   include/linux/kvm_host.h        |   9 ++
>   virt/kvm/kvm_main.c             |  54 ++++++++---
>   8 files changed, 264 insertions(+), 87 deletions(-)
> 


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 7/7] KVM: x86/mmu: Lazily allocate memslot rmaps
  2021-05-03 13:42   ` Paolo Bonzini
@ 2021-05-03 17:29     ` Ben Gardon
  0 siblings, 0 replies; 26+ messages in thread
From: Ben Gardon @ 2021-05-03 17:29 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: LKML, kvm, Peter Xu, Sean Christopherson, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong

On Mon, May 3, 2021 at 6:42 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 29/04/21 23:18, Ben Gardon wrote:
> > +int alloc_memslots_rmaps(struct kvm *kvm, struct kvm_memslots *slots)
>
> This can be static, can't it?

Ah, yes. Absolutely.

>
> Paolo
>
> > +{
> > +     struct kvm_memory_slot *slot;
> > +     int r = 0;
> > +
> > +     kvm_for_each_memslot(slot, slots) {
> > +             r = alloc_memslot_rmap(kvm, slot, slot->npages);
> > +             if (r)
> > +                     break;
> > +     }
> > +     return r;
> > +}
> > +
>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 0/7] Lazily allocate memslot rmaps
  2021-05-03 13:44 ` [PATCH v2 0/7] " Paolo Bonzini
@ 2021-05-03 17:31   ` Ben Gardon
  2021-05-04  7:21     ` Paolo Bonzini
  0 siblings, 1 reply; 26+ messages in thread
From: Ben Gardon @ 2021-05-03 17:31 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: LKML, kvm, Peter Xu, Sean Christopherson, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong

On Mon, May 3, 2021 at 6:45 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 29/04/21 23:18, Ben Gardon wrote:
> > This series enables KVM to save memory when using the TDP MMU by waiting
> > to allocate memslot rmaps until they are needed. To do this, KVM tracks
> > whether or not a shadow root has been allocated. In order to get away
> > with not allocating the rmaps, KVM must also be sure to skip operations
> > which iterate over the rmaps. If the TDP MMU is in use and we have not
> > allocated a shadow root, these operations would essentially be op-ops
> > anyway. Skipping the rmap operations has a secondary benefit of avoiding
> > acquiring the MMU lock in write mode in many cases, substantially
> > reducing MMU lock contention.
> >
> > This series was tested on an Intel Skylake machine. With the TDP MMU off
> > and on, this introduced no new failures on kvm-unit-tests or KVM selftests.
>
> Thanks, I only reported some technicalities in the ordering of loads
> (which matter since the loads happen with SRCU protection only).  Apart
> from this, this looks fine!

Awesome to hear, thank you for the reviews. Should I send a v3
addressing those comments, or did you already make those changes when
applying to your tree?

>
> Paolo
>
> > Changelog:
> > v2:
> >       Incorporated feedback from Paolo and Sean
> >       Replaced the memslot_assignment_lock with slots_arch_lock, which
> >       has a larger critical section.
> >
> > Ben Gardon (7):
> >    KVM: x86/mmu: Track if shadow MMU active
> >    KVM: x86/mmu: Skip rmap operations if shadow MMU inactive
> >    KVM: x86/mmu: Deduplicate rmap freeing
> >    KVM: x86/mmu: Factor out allocating memslot rmap
> >    KVM: mmu: Refactor memslot copy
> >    KVM: mmu: Add slots_arch_lock for memslot arch fields
> >    KVM: x86/mmu: Lazily allocate memslot rmaps
> >
> >   arch/x86/include/asm/kvm_host.h |  13 +++
> >   arch/x86/kvm/mmu/mmu.c          | 153 +++++++++++++++++++++-----------
> >   arch/x86/kvm/mmu/mmu_internal.h |   2 +
> >   arch/x86/kvm/mmu/tdp_mmu.c      |   6 +-
> >   arch/x86/kvm/mmu/tdp_mmu.h      |   4 +-
> >   arch/x86/kvm/x86.c              | 110 +++++++++++++++++++----
> >   include/linux/kvm_host.h        |   9 ++
> >   virt/kvm/kvm_main.c             |  54 ++++++++---
> >   8 files changed, 264 insertions(+), 87 deletions(-)
> >
>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 0/7] Lazily allocate memslot rmaps
  2021-05-03 17:31   ` Ben Gardon
@ 2021-05-04  7:21     ` Paolo Bonzini
  2021-05-04 17:28       ` Ben Gardon
  0 siblings, 1 reply; 26+ messages in thread
From: Paolo Bonzini @ 2021-05-04  7:21 UTC (permalink / raw)
  To: Ben Gardon
  Cc: LKML, kvm, Peter Xu, Sean Christopherson, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong

On 03/05/21 19:31, Ben Gardon wrote:
> On Mon, May 3, 2021 at 6:45 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>>
>> On 29/04/21 23:18, Ben Gardon wrote:
>>> This series enables KVM to save memory when using the TDP MMU by waiting
>>> to allocate memslot rmaps until they are needed. To do this, KVM tracks
>>> whether or not a shadow root has been allocated. In order to get away
>>> with not allocating the rmaps, KVM must also be sure to skip operations
>>> which iterate over the rmaps. If the TDP MMU is in use and we have not
>>> allocated a shadow root, these operations would essentially be op-ops
>>> anyway. Skipping the rmap operations has a secondary benefit of avoiding
>>> acquiring the MMU lock in write mode in many cases, substantially
>>> reducing MMU lock contention.
>>>
>>> This series was tested on an Intel Skylake machine. With the TDP MMU off
>>> and on, this introduced no new failures on kvm-unit-tests or KVM selftests.
>>
>> Thanks, I only reported some technicalities in the ordering of loads
>> (which matter since the loads happen with SRCU protection only).  Apart
>> from this, this looks fine!
> 
> Awesome to hear, thank you for the reviews. Should I send a v3
> addressing those comments, or did you already make those changes when
> applying to your tree?

No, I didn't (I wanted some oversight, and this is 5.14 stuff anyway).

Paolo


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 1/7] KVM: x86/mmu: Track if shadow MMU active
  2021-05-03 13:42   ` Paolo Bonzini
@ 2021-05-04 17:26     ` Ben Gardon
  2021-05-04 20:18       ` Paolo Bonzini
  0 siblings, 1 reply; 26+ messages in thread
From: Ben Gardon @ 2021-05-04 17:26 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: LKML, kvm, Peter Xu, Sean Christopherson, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong

On Mon, May 3, 2021 at 6:42 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 29/04/21 23:18, Ben Gardon wrote:
> > +void activate_shadow_mmu(struct kvm *kvm)
> > +{
> > +     kvm->arch.shadow_mmu_active = true;
> > +}
> > +
>
> I think there's no lock protecting both the write and the read side.
> Therefore this should be an smp_store_release, and all checks in
> patch 2 should be an smp_load_acquire.

That makes sense.

>
> Also, the assignments to slot->arch.rmap in patch 4 (alloc_memslot_rmap)
> should be an rcu_assign_pointer, while __gfn_to_rmap must be changed like so:
>
> +       struct kvm_rmap_head *head;
> ...
> -       return &slot->arch.rmap[level - PG_LEVEL_4K][idx];
> +       head = srcu_dereference(slot->arch.rmap[level - PG_LEVEL_4K], &kvm->srcu,
> +                                lockdep_is_held(&kvm->slots_arch_lock));
> +       return &head[idx];

I'm not sure I fully understand why this becomes necessary after patch
4. Isn't it already needed since the memslots are protected by RCU? Or
is there already a higher level rcu dereference?

__kvm_memslots already does an srcu dereference, so is there a path
where we aren't getting the slots from that function where this is
needed?

I wouldn't say that the rmaps are protected by RCU in any way that
separate from the memslots.

>
> Paolo
>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 0/7] Lazily allocate memslot rmaps
  2021-05-04  7:21     ` Paolo Bonzini
@ 2021-05-04 17:28       ` Ben Gardon
  2021-05-04 18:17         ` Sean Christopherson
  0 siblings, 1 reply; 26+ messages in thread
From: Ben Gardon @ 2021-05-04 17:28 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: LKML, kvm, Peter Xu, Sean Christopherson, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong

On Tue, May 4, 2021 at 12:21 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 03/05/21 19:31, Ben Gardon wrote:
> > On Mon, May 3, 2021 at 6:45 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
> >>
> >> On 29/04/21 23:18, Ben Gardon wrote:
> >>> This series enables KVM to save memory when using the TDP MMU by waiting
> >>> to allocate memslot rmaps until they are needed. To do this, KVM tracks
> >>> whether or not a shadow root has been allocated. In order to get away
> >>> with not allocating the rmaps, KVM must also be sure to skip operations
> >>> which iterate over the rmaps. If the TDP MMU is in use and we have not
> >>> allocated a shadow root, these operations would essentially be op-ops
> >>> anyway. Skipping the rmap operations has a secondary benefit of avoiding
> >>> acquiring the MMU lock in write mode in many cases, substantially
> >>> reducing MMU lock contention.
> >>>
> >>> This series was tested on an Intel Skylake machine. With the TDP MMU off
> >>> and on, this introduced no new failures on kvm-unit-tests or KVM selftests.
> >>
> >> Thanks, I only reported some technicalities in the ordering of loads
> >> (which matter since the loads happen with SRCU protection only).  Apart
> >> from this, this looks fine!
> >
> > Awesome to hear, thank you for the reviews. Should I send a v3
> > addressing those comments, or did you already make those changes when
> > applying to your tree?
>
> No, I didn't (I wanted some oversight, and this is 5.14 stuff anyway).

Ah, okay I'll send out a v3 soon, discussion on the other patches settles.

>
> Paolo
>

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 0/7] Lazily allocate memslot rmaps
  2021-05-04 17:28       ` Ben Gardon
@ 2021-05-04 18:17         ` Sean Christopherson
  0 siblings, 0 replies; 26+ messages in thread
From: Sean Christopherson @ 2021-05-04 18:17 UTC (permalink / raw)
  To: Ben Gardon
  Cc: Paolo Bonzini, LKML, kvm, Peter Xu, Peter Shier, Junaid Shahid,
	Jim Mattson, Yulei Zhang, Wanpeng Li, Vitaly Kuznetsov,
	Xiao Guangrong

On Tue, May 04, 2021, Ben Gardon wrote:
> On Tue, May 4, 2021 at 12:21 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
> >
> > On 03/05/21 19:31, Ben Gardon wrote:
> > > On Mon, May 3, 2021 at 6:45 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
> > >>
> > >> On 29/04/21 23:18, Ben Gardon wrote:
> > >>> This series enables KVM to save memory when using the TDP MMU by waiting
> > >>> to allocate memslot rmaps until they are needed. To do this, KVM tracks
> > >>> whether or not a shadow root has been allocated. In order to get away
> > >>> with not allocating the rmaps, KVM must also be sure to skip operations
> > >>> which iterate over the rmaps. If the TDP MMU is in use and we have not
> > >>> allocated a shadow root, these operations would essentially be op-ops
> > >>> anyway. Skipping the rmap operations has a secondary benefit of avoiding
> > >>> acquiring the MMU lock in write mode in many cases, substantially
> > >>> reducing MMU lock contention.
> > >>>
> > >>> This series was tested on an Intel Skylake machine. With the TDP MMU off
> > >>> and on, this introduced no new failures on kvm-unit-tests or KVM selftests.
> > >>
> > >> Thanks, I only reported some technicalities in the ordering of loads
> > >> (which matter since the loads happen with SRCU protection only).  Apart
> > >> from this, this looks fine!
> > >
> > > Awesome to hear, thank you for the reviews. Should I send a v3
> > > addressing those comments, or did you already make those changes when
> > > applying to your tree?
> >
> > No, I didn't (I wanted some oversight, and this is 5.14 stuff anyway).
> 
> Ah, okay I'll send out a v3 soon, discussion on the other patches settles.

I'll look through v2 this afternoon, now that I've mostly dug myself out of
RDPID hell.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 1/7] KVM: x86/mmu: Track if shadow MMU active
  2021-04-29 21:18 ` [PATCH v2 1/7] KVM: x86/mmu: Track if shadow MMU active Ben Gardon
  2021-05-03 13:42   ` Paolo Bonzini
@ 2021-05-04 19:55   ` Sean Christopherson
  2021-05-04 20:26     ` Paolo Bonzini
  1 sibling, 1 reply; 26+ messages in thread
From: Sean Christopherson @ 2021-05-04 19:55 UTC (permalink / raw)
  To: Ben Gardon
  Cc: linux-kernel, kvm, Paolo Bonzini, Peter Xu, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong

On Thu, Apr 29, 2021, Ben Gardon wrote:
> Add a field to each VM to track if the shadow / legacy MMU is actually
> in use. If the shadow MMU is not in use, then that knowledge opens the
> door to other optimizations which will be added in future patches.
> 
> Signed-off-by: Ben Gardon <bgardon@google.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 ++
>  arch/x86/kvm/mmu/mmu.c          | 10 +++++++++-
>  arch/x86/kvm/mmu/mmu_internal.h |  2 ++
>  arch/x86/kvm/mmu/tdp_mmu.c      |  6 ++++--
>  arch/x86/kvm/mmu/tdp_mmu.h      |  4 ++--
>  5 files changed, 19 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index ad22d4839bcc..3900dcf2439e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1122,6 +1122,8 @@ struct kvm_arch {
>  	 */
>  	spinlock_t tdp_mmu_pages_lock;
>  #endif /* CONFIG_X86_64 */
> +
> +	bool shadow_mmu_active;

I'm not a fan of the name, "shadow mmu" in KVM almost always means shadow paging
of some form, whereas this covers both shadow paging and legacy TDP support.

But, I think we we can avoid bikeshedding by simply eliminating this flag.  More
in later patches.

>  };
>  
>  struct kvm_vm_stat {
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 930ac8a7e7c9..3975272321d0 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3110,6 +3110,11 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>  	return ret;
>  }
>  
> +void activate_shadow_mmu(struct kvm *kvm)
> +{
> +	kvm->arch.shadow_mmu_active = true;
> +}
> +
>  static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
>  			       struct list_head *invalid_list)
>  {
> @@ -3280,6 +3285,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
>  		}
>  	}
>  
> +	activate_shadow_mmu(vcpu->kvm);
> +
>  	write_lock(&vcpu->kvm->mmu_lock);
>  	r = make_mmu_pages_available(vcpu);
>  	if (r < 0)
> @@ -5467,7 +5474,8 @@ void kvm_mmu_init_vm(struct kvm *kvm)
>  {
>  	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
>  
> -	kvm_mmu_init_tdp_mmu(kvm);
> +	if (!kvm_mmu_init_tdp_mmu(kvm))
> +		activate_shadow_mmu(kvm);

Doesn't come into play yet, but I would strongly prefer to open code setting the
necessary flag instead of relying on the helper to never fail.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 7/7] KVM: x86/mmu: Lazily allocate memslot rmaps
  2021-04-29 21:18 ` [PATCH v2 7/7] KVM: x86/mmu: Lazily allocate memslot rmaps Ben Gardon
  2021-05-03 13:42   ` Paolo Bonzini
@ 2021-05-04 20:13   ` Sean Christopherson
  2021-05-04 20:19     ` Paolo Bonzini
  2021-05-04 20:22   ` Paolo Bonzini
  2 siblings, 1 reply; 26+ messages in thread
From: Sean Christopherson @ 2021-05-04 20:13 UTC (permalink / raw)
  To: Ben Gardon
  Cc: linux-kernel, kvm, Paolo Bonzini, Peter Xu, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong

On Thu, Apr 29, 2021, Ben Gardon wrote:
> If the TDP MMU is in use, wait to allocate the rmaps until the shadow
> MMU is actually used. (i.e. a nested VM is launched.) This saves memory
> equal to 0.2% of guest memory in cases where the TDP MMU is used and
> there are no nested guests involved.
> 
> Signed-off-by: Ben Gardon <bgardon@google.com>
> ---
>  arch/x86/include/asm/kvm_host.h | 11 +++++++
>  arch/x86/kvm/mmu/mmu.c          | 21 +++++++++++--
>  arch/x86/kvm/mmu/mmu_internal.h |  2 +-
>  arch/x86/kvm/x86.c              | 54 ++++++++++++++++++++++++++++++---
>  4 files changed, 80 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3900dcf2439e..b8633ed00a6a 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1124,6 +1124,15 @@ struct kvm_arch {
>  #endif /* CONFIG_X86_64 */
>  
>  	bool shadow_mmu_active;
> +
> +	/*
> +	 * If set, the rmap should be allocated for any newly created or
> +	 * modified memslots. If allocating rmaps lazily, this may be set
> +	 * before the rmaps are allocated for existing memslots, but
> +	 * shadow_mmu_active will not be set until after the rmaps are fully
> +	 * allocated.
> +	 */
> +	bool alloc_memslot_rmaps;

Maybe "need_rmaps" or "need_memslot_rmaps"?

>  };
>  
>  struct kvm_vm_stat {
> @@ -1855,4 +1864,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
>  
>  int kvm_cpu_dirty_log_size(void);
>  
> +int alloc_all_memslots_rmaps(struct kvm *kvm);
> +
>  #endif /* _ASM_X86_KVM_HOST_H */
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index e252af46f205..b2a6585bd978 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3125,9 +3125,17 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
>  	return ret;
>  }
>  
> -void activate_shadow_mmu(struct kvm *kvm)
> +int activate_shadow_mmu(struct kvm *kvm)
>  {
> +	int r;
> +
> +	r = alloc_all_memslots_rmaps(kvm);
> +	if (r)
> +		return r;
> +
>  kvm->arch.shadow_mmu_active = true;

If shadow_mmu_active goes away, so does this helper.

> +
> +	return 0;
>  }
>  
>  static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
> @@ -3300,7 +3308,9 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
>  		}
>  	}
>  
> -	activate_shadow_mmu(vcpu->kvm);
> +	r = activate_shadow_mmu(vcpu->kvm);
> +	if (r)
> +		return r;
>  
>  	write_lock(&vcpu->kvm->mmu_lock);
>  	r = make_mmu_pages_available(vcpu);
> @@ -5491,7 +5501,12 @@ void kvm_mmu_init_vm(struct kvm *kvm)
>  	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
>  
>  	if (!kvm_mmu_init_tdp_mmu(kvm))
> -		activate_shadow_mmu(kvm);
> +		/*
> +		 * No memslots can have been allocated at this point.
> +		 * activate_shadow_mmu won't actually need to allocate
> +		 * rmaps, so it cannot fail.
> +		 */
> +		WARN_ON(activate_shadow_mmu(kvm));

This is where I really don't like calling the full flow.  VM init is already
special, I don't see any harm in open coding the setting of the flag.  This also
provides a good place to document that the smp_store/load business is unnecessary
since there can't be users.

>  	node->track_write = kvm_mmu_pte_write;
>  	node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
> -static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
> +int alloc_memslots_rmaps(struct kvm *kvm, struct kvm_memslots *slots)
> +{
> +	struct kvm_memory_slot *slot;
> +	int r = 0;
> +
> +	kvm_for_each_memslot(slot, slots) {
> +		r = alloc_memslot_rmap(kvm, slot, slot->npages);
> +		if (r)
> +			break;
> +	}
> +	return r;
> +}

Just open code this in the caller, it's literally one line of code and the
indentation isn't bad.

> +
> +int alloc_all_memslots_rmaps(struct kvm *kvm)
> +{
> +	struct kvm_memslots *slots;
> +	int r = 0;
> +	int i;
> +
> +	mutex_lock(&kvm->slots_arch_lock);
> +	kvm->arch.alloc_memslot_rmaps = true;
> +
> +	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
> +		slots = __kvm_memslots(kvm, i);
> +		r = alloc_memslots_rmaps(kvm, slots);
> +		if (r)

It'd be easier just to destroy the rmaps on failure and then do:

	if (kvm->arch.needs_memslots_rmaps)
		return;

	mutex_lock(&kvm->slots_arch_lock);

	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
		kvm_for_each_memslot(slot, __kvm_memslots(kvm, i)) {
			r = alloc_memslot_rmap(kvm, slot, slot->npages);
				break;
		}
	}

	if (!r)
		smp_store_release(kvm->arch.needs_memslots_rmaps, true);
	else
		kvm_free_rmaps(kvm);
	mutex_unlock(&kvm->slots_arch_lock);


and make alloc_memslot_rmap() a pure allocator (no checks on whether it should
actually do allocations), i.e. push the check to the memslot flow:

static int kvm_alloc_memslot_metadata(struct kvm *kvm,
				      struct kvm_memory_slot *slot,
				      unsigned long npages)
{
	int i;
	int r;

	/*
	 * Clear out the previous array pointers for the KVM_MR_MOVE case.  The
	 * old arrays will be freed by __kvm_set_memory_region() if installing
	 * the new memslot is successful.
	 */
	memset(&slot->arch, 0, sizeof(slot->arch));

	if (kvm->arch.needs_memslots_rmaps) {
		r = alloc_memslot_rmap(kvm, slot, npages);
		if (r)
			return r;
	}


With that, there's no need for the separate shadow_mmu_active flag, and you can
do s/activate_shadow_mmu/kvm_activate_rmaps or so.


> +			break;
> +	}
> +	mutex_unlock(&kvm->slots_arch_lock);
> +	return r;
> +}
> +
> +static int kvm_alloc_memslot_metadata(struct kvm *kvm,
> +				      struct kvm_memory_slot *slot,
>  				      unsigned long npages)
>  {
>  	int i;
> @@ -10881,7 +10927,7 @@ static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
>  	 */
>  	memset(&slot->arch, 0, sizeof(slot->arch));
>  
> -	r = alloc_memslot_rmap(slot, npages);
> +	r = alloc_memslot_rmap(kvm, slot, npages);
>  	if (r)
>  		return r;
>  
> @@ -10954,7 +11000,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
>  				enum kvm_mr_change change)
>  {
>  	if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
> -		return kvm_alloc_memslot_metadata(memslot,
> +		return kvm_alloc_memslot_metadata(kvm, memslot,
>  						  mem->memory_size >> PAGE_SHIFT);
>  	return 0;
>  }
> -- 
> 2.31.1.527.g47e6f16901-goog
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 1/7] KVM: x86/mmu: Track if shadow MMU active
  2021-05-04 17:26     ` Ben Gardon
@ 2021-05-04 20:18       ` Paolo Bonzini
  0 siblings, 0 replies; 26+ messages in thread
From: Paolo Bonzini @ 2021-05-04 20:18 UTC (permalink / raw)
  To: Ben Gardon
  Cc: LKML, kvm, Peter Xu, Sean Christopherson, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong

On 04/05/21 19:26, Ben Gardon wrote:
> On Mon, May 3, 2021 at 6:42 AM Paolo Bonzini <pbonzini@redhat.com> wrote:
>>
>> On 29/04/21 23:18, Ben Gardon wrote:
>>> +void activate_shadow_mmu(struct kvm *kvm)
>>> +{
>>> +     kvm->arch.shadow_mmu_active = true;
>>> +}
>>> +
>>
>> I think there's no lock protecting both the write and the read side.
>> Therefore this should be an smp_store_release, and all checks in
>> patch 2 should be an smp_load_acquire.
> 
> That makes sense.
> 
>>
>> Also, the assignments to slot->arch.rmap in patch 4 (alloc_memslot_rmap)
>> should be an rcu_assign_pointer, while __gfn_to_rmap must be changed like so:
>>
>> +       struct kvm_rmap_head *head;
>> ...
>> -       return &slot->arch.rmap[level - PG_LEVEL_4K][idx];
>> +       head = srcu_dereference(slot->arch.rmap[level - PG_LEVEL_4K], &kvm->srcu,
>> +                                lockdep_is_held(&kvm->slots_arch_lock));
>> +       return &head[idx];
> 
> I'm not sure I fully understand why this becomes necessary after patch
> 4. Isn't it already needed since the memslots are protected by RCU? Or
> is there already a higher level rcu dereference?
> 
> __kvm_memslots already does an srcu dereference, so is there a path
> where we aren't getting the slots from that function where this is
> needed?

There are two point of views:

1) the easier one is just CONFIG_PROVE_RCU debugging: the rmaps need to 
be accessed under RCU because the memslots can disappear as soon as 
kvm->srcu is unlocked.

2) the harder one (though at this point I'm better at figuring out these 
ordering bugs than "traditional" mutex races) is what the happens before 
relation[1] looks like.  Consider what happens if the rmaps are 
allocated by *another thread* after the slots have been fetched.

thread 1		thread 2		thread 3
allocate memslots
rcu_assign_pointer
			slots = srcu_dereference
						allocate rmap
						rcu_assign_pointer
			head = slot->arch.rmap[]

Here, thread 3 is allocating the rmaps in the SRCU-protected 
kvm_memslots; those rmaps that didn't exist at the time thread 1 did the 
rcu_assign_pointer (which synchronizes with thread 2's srcu_dereference 
that retrieves slots), hence they were not covered by the release 
semantics of that rcu_assign_pointer and the "consume" semantics of the 
corresponding srcu_dereference.  Therefore, thread 2 needs another 
srcu_dereference when retrieving them.

Paolo

[1] https://lwn.net/Articles/844224/

> I wouldn't say that the rmaps are protected by RCU in any way that
> separate from the memslots.


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 7/7] KVM: x86/mmu: Lazily allocate memslot rmaps
  2021-05-04 20:13   ` Sean Christopherson
@ 2021-05-04 20:19     ` Paolo Bonzini
  2021-05-04 20:34       ` Sean Christopherson
  0 siblings, 1 reply; 26+ messages in thread
From: Paolo Bonzini @ 2021-05-04 20:19 UTC (permalink / raw)
  To: Sean Christopherson, Ben Gardon
  Cc: linux-kernel, kvm, Peter Xu, Peter Shier, Junaid Shahid,
	Jim Mattson, Yulei Zhang, Wanpeng Li, Vitaly Kuznetsov,
	Xiao Guangrong

On 04/05/21 22:13, Sean Christopherson wrote:
>> +	/*
>> +	 * If set, the rmap should be allocated for any newly created or
>> +	 * modified memslots. If allocating rmaps lazily, this may be set
>> +	 * before the rmaps are allocated for existing memslots, but
>> +	 * shadow_mmu_active will not be set until after the rmaps are fully
>> +	 * allocated.
>> +	 */
>> +	bool alloc_memslot_rmaps;
> Maybe "need_rmaps" or "need_memslot_rmaps"?
> 

Since we're bikeshedding I prefer "memslots_have_rmaps" or something not 
too distant from that.

Paolo


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 7/7] KVM: x86/mmu: Lazily allocate memslot rmaps
  2021-04-29 21:18 ` [PATCH v2 7/7] KVM: x86/mmu: Lazily allocate memslot rmaps Ben Gardon
  2021-05-03 13:42   ` Paolo Bonzini
  2021-05-04 20:13   ` Sean Christopherson
@ 2021-05-04 20:22   ` Paolo Bonzini
  2 siblings, 0 replies; 26+ messages in thread
From: Paolo Bonzini @ 2021-05-04 20:22 UTC (permalink / raw)
  To: Ben Gardon, linux-kernel, kvm
  Cc: Peter Xu, Sean Christopherson, Peter Shier, Junaid Shahid,
	Jim Mattson, Yulei Zhang, Wanpeng Li, Vitaly Kuznetsov,
	Xiao Guangrong

On 29/04/21 23:18, Ben Gardon wrote:
> +	/*
> +	 * If set, the rmap should be allocated for any newly created or
> +	 * modified memslots. If allocating rmaps lazily, this may be set
> +	 * before the rmaps are allocated for existing memslots, but
> +	 * shadow_mmu_active will not be set until after the rmaps are fully
> +	 * allocated.
> +	 */
> +	bool alloc_memslot_rmaps;

Let's remove the whole sentence starting with "If allocating rmaps 
lazily".  The part about shadow_mmu_active should go there, while the 
rest is pointless as long as we just say that this flag will be accessed 
only under slots_arch_lock.

(Regarding shadow_mmu_active, I think I know what Sean will be 
suggesting because I had a similar thought and decided it introduced 
extra unnecessary complication... but maybe not, so let's see what he says).

Paolo


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 1/7] KVM: x86/mmu: Track if shadow MMU active
  2021-05-04 19:55   ` Sean Christopherson
@ 2021-05-04 20:26     ` Paolo Bonzini
  2021-05-04 20:36       ` Sean Christopherson
  0 siblings, 1 reply; 26+ messages in thread
From: Paolo Bonzini @ 2021-05-04 20:26 UTC (permalink / raw)
  To: Sean Christopherson, Ben Gardon
  Cc: linux-kernel, kvm, Peter Xu, Peter Shier, Junaid Shahid,
	Jim Mattson, Yulei Zhang, Wanpeng Li, Vitaly Kuznetsov,
	Xiao Guangrong

On 04/05/21 21:55, Sean Christopherson wrote:
> But, I think we we can avoid bikeshedding by simply eliminating this flag.  More
> in later patches.

Are you thinking of checking slot->arch.rmap[0] directly?  That should 
work indeed.

>> -	kvm_mmu_init_tdp_mmu(kvm);
>> +	if (!kvm_mmu_init_tdp_mmu(kvm))
>> +		activate_shadow_mmu(kvm);
> Doesn't come into play yet, but I would strongly prefer to open code setting the
> necessary flag instead of relying on the helper to never fail.
> 

You mean

kvm->arch.shadow_mmu_active = !kvm_mmu_init_tdp_mmu(kvm);

(which would assign to alloc_memslot_rmaps instead if shadow_mmu_active 
is removed)?  That makes sense.

Paolo


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 7/7] KVM: x86/mmu: Lazily allocate memslot rmaps
  2021-05-04 20:19     ` Paolo Bonzini
@ 2021-05-04 20:34       ` Sean Christopherson
  0 siblings, 0 replies; 26+ messages in thread
From: Sean Christopherson @ 2021-05-04 20:34 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Ben Gardon, linux-kernel, kvm, Peter Xu, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong

On Tue, May 04, 2021, Paolo Bonzini wrote:
> On 04/05/21 22:13, Sean Christopherson wrote:
> > > +	/*
> > > +	 * If set, the rmap should be allocated for any newly created or
> > > +	 * modified memslots. If allocating rmaps lazily, this may be set
> > > +	 * before the rmaps are allocated for existing memslots, but
> > > +	 * shadow_mmu_active will not be set until after the rmaps are fully
> > > +	 * allocated.
> > > +	 */
> > > +	bool alloc_memslot_rmaps;
> > Maybe "need_rmaps" or "need_memslot_rmaps"?
> > 
> 
> Since we're bikeshedding I prefer "memslots_have_rmaps" or something not too
> distant from that.

Works for me.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH v2 1/7] KVM: x86/mmu: Track if shadow MMU active
  2021-05-04 20:26     ` Paolo Bonzini
@ 2021-05-04 20:36       ` Sean Christopherson
  0 siblings, 0 replies; 26+ messages in thread
From: Sean Christopherson @ 2021-05-04 20:36 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Ben Gardon, linux-kernel, kvm, Peter Xu, Peter Shier,
	Junaid Shahid, Jim Mattson, Yulei Zhang, Wanpeng Li,
	Vitaly Kuznetsov, Xiao Guangrong

On Tue, May 04, 2021, Paolo Bonzini wrote:
> On 04/05/21 21:55, Sean Christopherson wrote:
> > But, I think we we can avoid bikeshedding by simply eliminating this flag.  More
> > in later patches.
> 
> Are you thinking of checking slot->arch.rmap[0] directly?  That should work
> indeed.
> 
> > > -	kvm_mmu_init_tdp_mmu(kvm);
> > > +	if (!kvm_mmu_init_tdp_mmu(kvm))
> > > +		activate_shadow_mmu(kvm);
> > Doesn't come into play yet, but I would strongly prefer to open code setting the
> > necessary flag instead of relying on the helper to never fail.
> > 
> 
> You mean
> 
> kvm->arch.shadow_mmu_active = !kvm_mmu_init_tdp_mmu(kvm);
> 
> (which would assign to alloc_memslot_rmaps instead if shadow_mmu_active is
> removed)?  That makes sense.

Ya, that or:

	if (kvm_mmu_init_tdp_mmu(kvm))
		kvm->arch.memslots_have_rmaps = true;

I don't have a preference between the two variants.

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, back to index

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-29 21:18 [PATCH v2 0/7] Lazily allocate memslot rmaps Ben Gardon
2021-04-29 21:18 ` [PATCH v2 1/7] KVM: x86/mmu: Track if shadow MMU active Ben Gardon
2021-05-03 13:42   ` Paolo Bonzini
2021-05-04 17:26     ` Ben Gardon
2021-05-04 20:18       ` Paolo Bonzini
2021-05-04 19:55   ` Sean Christopherson
2021-05-04 20:26     ` Paolo Bonzini
2021-05-04 20:36       ` Sean Christopherson
2021-04-29 21:18 ` [PATCH v2 2/7] KVM: x86/mmu: Skip rmap operations if shadow MMU inactive Ben Gardon
2021-04-29 21:18 ` [PATCH v2 3/7] KVM: x86/mmu: Deduplicate rmap freeing Ben Gardon
2021-04-29 21:18 ` [PATCH v2 4/7] KVM: x86/mmu: Factor out allocating memslot rmap Ben Gardon
2021-04-29 21:18 ` [PATCH v2 5/7] KVM: mmu: Refactor memslot copy Ben Gardon
2021-04-29 21:18 ` [PATCH v2 6/7] KVM: mmu: Add slots_arch_lock for memslot arch fields Ben Gardon
2021-05-03 13:29   ` Paolo Bonzini
2021-04-29 21:18 ` [PATCH v2 7/7] KVM: x86/mmu: Lazily allocate memslot rmaps Ben Gardon
2021-05-03 13:42   ` Paolo Bonzini
2021-05-03 17:29     ` Ben Gardon
2021-05-04 20:13   ` Sean Christopherson
2021-05-04 20:19     ` Paolo Bonzini
2021-05-04 20:34       ` Sean Christopherson
2021-05-04 20:22   ` Paolo Bonzini
2021-05-03 13:44 ` [PATCH v2 0/7] " Paolo Bonzini
2021-05-03 17:31   ` Ben Gardon
2021-05-04  7:21     ` Paolo Bonzini
2021-05-04 17:28       ` Ben Gardon
2021-05-04 18:17         ` Sean Christopherson

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git
	git clone --mirror https://lore.kernel.org/lkml/10 lkml/git/10.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git