kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sean Christopherson <seanjc@google.com>
To: Marc Zyngier <maz@kernel.org>,
	Huacai Chen <chenhuacai@kernel.org>,
	Aleksandar Markovic <aleksandar.qemu.devel@gmail.com>,
	Paul Mackerras <paulus@ozlabs.org>,
	Anup Patel <anup.patel@wdc.com>,
	Paul Walmsley <paul.walmsley@sifive.com>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	Albert Ou <aou@eecs.berkeley.edu>,
	Christian Borntraeger <borntraeger@de.ibm.com>,
	Janosch Frank <frankja@linux.ibm.com>,
	Paolo Bonzini <pbonzini@redhat.com>
Cc: James Morse <james.morse@arm.com>,
	Alexandru Elisei <alexandru.elisei@arm.com>,
	Suzuki K Poulose <suzuki.poulose@arm.com>,
	Atish Patra <atish.patra@wdc.com>,
	David Hildenbrand <david@redhat.com>,
	Cornelia Huck <cohuck@redhat.com>,
	Claudio Imbrenda <imbrenda@linux.ibm.com>,
	Sean Christopherson <seanjc@google.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
	linux-arm-kernel@lists.infradead.org,
	kvmarm@lists.cs.columbia.edu, linux-mips@vger.kernel.org,
	kvm@vger.kernel.org, kvm-ppc@vger.kernel.org,
	kvm-riscv@lists.infradead.org, linux-riscv@lists.infradead.org,
	linux-kernel@vger.kernel.org, Ben Gardon <bgardon@google.com>,
	"Maciej S . Szmigiero" <maciej.szmigiero@oracle.com>
Subject: [PATCH v5.5 24/30] KVM: Use interval tree to do fast hva lookup in memslots
Date: Thu,  4 Nov 2021 00:25:25 +0000	[thread overview]
Message-ID: <20211104002531.1176691-25-seanjc@google.com> (raw)
In-Reply-To: <20211104002531.1176691-1-seanjc@google.com>

From: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>

The current memslots implementation only allows quick binary search by gfn,
quick lookup by hva is not possible - the implementation has to do a linear
scan of the whole memslots array, even though the operation being performed
might apply just to a single memslot.

This significantly hurts performance of per-hva operations with higher
memslot counts.

Since hva ranges can overlap between memslots an interval tree is needed
for tracking them.

Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
[sean: handle interval tree updates in kvm_replace_memslot()]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/arm64/kvm/Kconfig   |  1 +
 arch/mips/kvm/Kconfig    |  1 +
 arch/powerpc/kvm/Kconfig |  1 +
 arch/s390/kvm/Kconfig    |  1 +
 arch/x86/kvm/Kconfig     |  1 +
 include/linux/kvm_host.h |  3 ++
 virt/kvm/kvm_main.c      | 60 +++++++++++++++++++++++++++++-----------
 7 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index d7eec0b43744..42185dcc9596 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -38,6 +38,7 @@ menuconfig KVM
 	select HAVE_KVM_IRQ_BYPASS
 	select HAVE_KVM_VCPU_RUN_PID_CHANGE
 	select SCHED_INFO
+	select INTERVAL_TREE
 	help
 	  Support hosting virtualized guest machines.
 
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index a77297480f56..91d197bee9c0 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -27,6 +27,7 @@ config KVM
 	select KVM_MMIO
 	select MMU_NOTIFIER
 	select SRCU
+	select INTERVAL_TREE
 	help
 	  Support for hosting Guest kernels.
 
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index ff581d70f20c..e4c24f524ba8 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -26,6 +26,7 @@ config KVM
 	select KVM_VFIO
 	select IRQ_BYPASS_MANAGER
 	select HAVE_KVM_IRQ_BYPASS
+	select INTERVAL_TREE
 
 config KVM_BOOK3S_HANDLER
 	bool
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 67a8e770e369..2e84d3922f7c 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -33,6 +33,7 @@ config KVM
 	select HAVE_KVM_NO_POLL
 	select SRCU
 	select KVM_VFIO
+	select INTERVAL_TREE
 	help
 	  Support hosting paravirtualized guest machines using the SIE
 	  virtualization capability on the mainframe. This should work
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 619186138176..7618bef0a4a9 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -43,6 +43,7 @@ config KVM
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
 	select KVM_VFIO
 	select SRCU
+	select INTERVAL_TREE
 	select HAVE_KVM_PM_NOTIFIER if PM
 	help
 	  Support hosting fully virtualized guest machines using hardware
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 81003e3acd53..d0363e2ba098 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -30,6 +30,7 @@
 #include <linux/nospec.h>
 #include <linux/notifier.h>
 #include <linux/hashtable.h>
+#include <linux/interval_tree.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -427,6 +428,7 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
 
 struct kvm_memory_slot {
 	struct hlist_node id_node;
+	struct interval_tree_node hva_node;
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long *dirty_bitmap;
@@ -528,6 +530,7 @@ static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
  */
 struct kvm_memslots {
 	u64 generation;
+	struct rb_root_cached hva_tree;
 	/* The mapping table from slot id to the index in memslots[]. */
 	DECLARE_HASHTABLE(id_hash, 7);
 	atomic_t last_used_slot;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 13c497abaab8..f2235c430e64 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -498,6 +498,12 @@ static void kvm_null_fn(void)
 }
 #define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn)
 
+/* Iterate over each memslot intersecting [start, last] (inclusive) range */
+#define kvm_for_each_memslot_in_hva_range(node, slots, start, last)	     \
+	for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
+	     node;							     \
+	     node = interval_tree_iter_next(node, start, last))	     \
+
 static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
 						  const struct kvm_hva_range *range)
 {
@@ -507,6 +513,9 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
 	struct kvm_memslots *slots;
 	int i, idx;
 
+	if (WARN_ON_ONCE(range->end <= range->start))
+		return 0;
+
 	/* A null handler is allowed if and only if on_lock() is provided. */
 	if (WARN_ON_ONCE(IS_KVM_NULL_FN(range->on_lock) &&
 			 IS_KVM_NULL_FN(range->handler)))
@@ -515,15 +524,17 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
 	idx = srcu_read_lock(&kvm->srcu);
 
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		struct interval_tree_node *node;
+
 		slots = __kvm_memslots(kvm, i);
-		kvm_for_each_memslot(slot, slots) {
+		kvm_for_each_memslot_in_hva_range(node, slots,
+						  range->start, range->end - 1) {
 			unsigned long hva_start, hva_end;
 
+			slot = container_of(node, struct kvm_memory_slot, hva_node);
 			hva_start = max(range->start, slot->userspace_addr);
 			hva_end = min(range->end, slot->userspace_addr +
 						  (slot->npages << PAGE_SHIFT));
-			if (hva_start >= hva_end)
-				continue;
 
 			/*
 			 * To optimize for the likely case where the address
@@ -859,6 +870,7 @@ static struct kvm_memslots *kvm_alloc_memslots(void)
 	if (!slots)
 		return NULL;
 
+	slots->hva_tree = RB_ROOT_CACHED;
 	hash_init(slots->id_hash);
 
 	return slots;
@@ -1262,22 +1274,32 @@ static void kvm_replace_memslot(struct kvm_memslots *slots,
 				struct kvm_memory_slot *new)
 {
 	/*
-	 * Remove the old memslot from the hash list, copying the node data
-	 * would corrupt the list.
+	 * Remove the old memslot from the hash list and interval tree, copying
+	 * the node data would corrupt the structures.
 	 */
 	if (old) {
 		hash_del(&old->id_node);
+		interval_tree_remove(&old->hva_node, &slots->hva_tree);
 
 		if (!new)
 			return;
 	}
 
-	/* Copy the source *data*, not the pointer, to the destination. */
-	if (old)
+	/*
+	 * Copy the source *data*, not the pointer, to the destination.  If
+	 * @old is NULL, initialize @new's hva range.
+	 */
+	if (old) {
 		*new = *old;
+	} else if (new) {
+		new->hva_node.start = new->userspace_addr;
+		new->hva_node.last = new->userspace_addr +
+				     (new->npages << PAGE_SHIFT) - 1;
+	}
 
 	/* (Re)Add the new memslot. */
 	hash_add(slots->id_hash, &new->id_node, new->id);
+	interval_tree_insert(&new->hva_node, &slots->hva_tree);
 }
 
 static void kvm_shift_memslot(struct kvm_memslots *slots, int dst, int src)
@@ -1308,7 +1330,7 @@ static inline void kvm_memslot_delete(struct kvm_memslots *slots,
 		atomic_set(&slots->last_used_slot, 0);
 
 	/*
-	 * Remove the to-be-deleted memslot from the list _before_ shifting
+	 * Remove the to-be-deleted memslot from the list/tree _before_ shifting
 	 * the trailing memslots forward, its data will be overwritten.
 	 * Defer the (somewhat pointless) copying of the memslot until after
 	 * the last slot has been shifted to avoid overwriting said last slot.
@@ -1335,7 +1357,8 @@ static inline int kvm_memslot_insert_back(struct kvm_memslots *slots)
  * itself is not preserved in the array, i.e. not swapped at this time, only
  * its new index into the array is tracked.  Returns the changed memslot's
  * current index into the memslots array.
- * The memslot at the returned index will not be in @slots->id_hash by then.
+ * The memslot at the returned index will not be in @slots->hva_tree or
+ * @slots->id_hash by then.
  * @memslot is a detached struct with desired final data of the changed slot.
  */
 static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
@@ -1349,10 +1372,10 @@ static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
 		return -1;
 
 	/*
-	 * Delete the slot from the hash table before sorting the remaining
-	 * slots, the slot's data may be overwritten when copying slots as part
-	 * of the sorting proccess.  update_memslots() will unconditionally
-	 * rewrite the entire slot and re-add it to the hash table.
+	 * Delete the slot from the hash table and interval tree before sorting
+	 * the remaining slots, the slot's data may be overwritten when copying
+	 * slots as part of the sorting proccess.  update_memslots() will
+	 * unconditionally rewrite and re-add the entire slot.
 	 */
 	kvm_replace_memslot(slots, oldslot, NULL);
 
@@ -1378,10 +1401,12 @@ static inline int kvm_memslot_move_backward(struct kvm_memslots *slots,
  * is not preserved in the array, i.e. not swapped at this time, only its new
  * index into the array is tracked.  Returns the changed memslot's final index
  * into the memslots array.
- * The memslot at the returned index will not be in @slots->id_hash by then.
+ * The memslot at the returned index will not be in @slots->hva_tree or
+ * @slots->id_hash by then.
  * @memslot is a detached struct with desired final data of the new or
  * changed slot.
- * Assumes that the memslot at @start index is not in @slots->id_hash.
+ * Assumes that the memslot at @start index is not in @slots->hva_tree or
+ * @slots->id_hash.
  */
 static inline int kvm_memslot_move_forward(struct kvm_memslots *slots,
 					   struct kvm_memory_slot *memslot,
@@ -1574,9 +1599,12 @@ static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
 
 	memcpy(slots, old, kvm_memslots_size(old->used_slots));
 
+	slots->hva_tree = RB_ROOT_CACHED;
 	hash_init(slots->id_hash);
-	kvm_for_each_memslot(memslot, slots)
+	kvm_for_each_memslot(memslot, slots) {
+		interval_tree_insert(&memslot->hva_node, &slots->hva_tree);
 		hash_add(slots->id_hash, &memslot->id_node, memslot->id);
+	}
 
 	return slots;
 }
-- 
2.33.1.1089.g2158813163f-goog


  parent reply	other threads:[~2021-11-04  0:28 UTC|newest]

Thread overview: 66+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-04  0:25 [PATCH v5.5 00/30] KVM: Scalable memslots implementation Sean Christopherson
2021-11-04  0:25 ` [PATCH v5.5 01/30] KVM: Ensure local memslot copies operate on up-to-date arch-specific data Sean Christopherson
2021-11-04 21:27   ` Ben Gardon
2021-11-04 22:41     ` Sean Christopherson
2021-11-09  0:37   ` Maciej S. Szmigiero
2021-11-09  1:17     ` Sean Christopherson
2021-11-04  0:25 ` [PATCH v5.5 02/30] KVM: Disallow user memslot with size that exceeds "unsigned long" Sean Christopherson
2021-11-09  0:38   ` Maciej S. Szmigiero
2021-11-04  0:25 ` [PATCH v5.5 03/30] KVM: Require total number of memslot pages to fit in an unsigned long Sean Christopherson
2021-11-09  0:38   ` Maciej S. Szmigiero
2021-11-04  0:25 ` [PATCH v5.5 04/30] KVM: Open code kvm_delete_memslot() into its only caller Sean Christopherson
2021-11-09  0:38   ` Maciej S. Szmigiero
2021-11-04  0:25 ` [PATCH v5.5 05/30] KVM: Resync only arch fields when slots_arch_lock gets reacquired Sean Christopherson
2021-11-09  0:38   ` Maciej S. Szmigiero
2021-11-04  0:25 ` [PATCH v5.5 06/30] KVM: Use "new" memslot's address space ID instead of dedicated param Sean Christopherson
2021-11-09  0:39   ` Maciej S. Szmigiero
2021-11-04  0:25 ` [PATCH v5.5 07/30] KVM: Let/force architectures to deal with arch specific memslot data Sean Christopherson
2021-11-09  0:39   ` Maciej S. Szmigiero
2021-11-09  1:13     ` Sean Christopherson
2021-11-04  0:25 ` [PATCH v5.5 08/30] KVM: arm64: Use "new" memslot instead of userspace memory region Sean Christopherson
2021-11-09  6:36   ` Reiji Watanabe
2021-11-04  0:25 ` [PATCH v5.5 09/30] KVM: MIPS: Drop pr_debug from memslot commit to avoid using "mem" Sean Christopherson
2021-11-04  0:25 ` [PATCH v5.5 10/30] KVM: PPC: Avoid referencing userspace memory region in memslot updates Sean Christopherson
2021-11-04  0:25 ` [PATCH v5.5 11/30] KVM: s390: Use "new" memslot instead of userspace memory region Sean Christopherson
2021-11-04  0:25 ` [PATCH v5.5 12/30] KVM: x86: " Sean Christopherson
2021-11-09  0:40   ` Maciej S. Szmigiero
2021-11-04  0:25 ` [PATCH v5.5 13/30] KVM: RISC-V: " Sean Christopherson
2021-11-04  0:25 ` [PATCH v5.5 14/30] KVM: Stop passing kvm_userspace_memory_region to arch memslot hooks Sean Christopherson
2021-11-09  0:40   ` Maciej S. Szmigiero
2021-11-04  0:25 ` [PATCH v5.5 15/30] KVM: Use prepare/commit hooks to handle generic memslot metadata updates Sean Christopherson
2021-11-09  0:40   ` Maciej S. Szmigiero
2021-11-04  0:25 ` [PATCH v5.5 16/30] KVM: x86: Don't assume old/new memslots are non-NULL at memslot commit Sean Christopherson
2021-11-09  0:40   ` Maciej S. Szmigiero
2021-11-04  0:25 ` [PATCH v5.5 17/30] KVM: s390: Skip gfn/size sanity checks on memslot DELETE or FLAGS_ONLY Sean Christopherson
2021-11-04  0:25 ` [PATCH v5.5 18/30] KVM: Don't make a full copy of the old memslot in __kvm_set_memory_region() Sean Christopherson
2021-11-09  0:41   ` Maciej S. Szmigiero
2021-11-04  0:25 ` [PATCH v5.5 19/30] KVM: x86: Don't call kvm_mmu_change_mmu_pages() if the count hasn't changed Sean Christopherson
2021-11-04  0:25 ` [PATCH v5.5 20/30] KVM: x86: Use nr_memslot_pages to avoid traversing the memslots array Sean Christopherson
2021-11-09  0:41   ` Maciej S. Szmigiero
2021-11-09  1:34     ` Sean Christopherson
2021-11-09 16:29       ` Maciej S. Szmigiero
2021-11-04  0:25 ` [PATCH v5.5 21/30] KVM: Integrate gfn_to_memslot_approx() into search_memslots() Sean Christopherson
2021-11-04  0:25 ` [PATCH v5.5 22/30] KVM: Move WARN on invalid memslot index to update_memslots() Sean Christopherson
2021-11-04  0:25 ` [PATCH v5.5 23/30] KVM: Resolve memslot ID via a hash table instead of via a static array Sean Christopherson
2021-11-11 23:51   ` Maciej S. Szmigiero
2021-11-12  1:03     ` Sean Christopherson
2021-11-04  0:25 ` Sean Christopherson [this message]
2021-11-11 23:52   ` [PATCH v5.5 24/30] KVM: Use interval tree to do fast hva lookup in memslots Maciej S. Szmigiero
2021-11-12  1:05     ` Sean Christopherson
2021-11-04  0:25 ` [PATCH v5.5 25/30] KVM: s390: Introduce kvm_s390_get_gfn_end() Sean Christopherson
2021-11-04  0:25 ` [PATCH v5.5 26/30] KVM: Keep memslots in tree-based structures instead of array-based ones Sean Christopherson
2021-11-11 23:52   ` Maciej S. Szmigiero
2021-11-12  0:51     ` Sean Christopherson
2021-11-13 15:22       ` Maciej S. Szmigiero
2021-11-04  0:25 ` [PATCH v5.5 27/30] KVM: Optimize gfn lookup in kvm_zap_gfn_range() Sean Christopherson
2021-11-04  0:25 ` [PATCH v5.5 28/30] KVM: Optimize overlapping memslots check Sean Christopherson
2021-11-04  0:25 ` [PATCH v5.5 29/30] KVM: Wait 'til the bitter end to initialize the "new" memslot Sean Christopherson
2021-11-11 23:52   ` Maciej S. Szmigiero
2021-11-04  0:25 ` [PATCH v5.5 30/30] KVM: Dynamically allocate "new" memslots from the get-go Sean Christopherson
2021-11-11 23:53   ` Maciej S. Szmigiero
2021-11-12  1:32     ` Sean Christopherson
2021-11-09  0:43 ` [PATCH v5.5 00/30] KVM: Scalable memslots implementation Maciej S. Szmigiero
2021-11-09  1:21   ` Sean Christopherson
2021-11-11 23:53     ` Maciej S. Szmigiero
2021-11-23 14:42       ` Maciej S. Szmigiero
2021-11-26 12:33         ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211104002531.1176691-25-seanjc@google.com \
    --to=seanjc@google.com \
    --cc=aleksandar.qemu.devel@gmail.com \
    --cc=alexandru.elisei@arm.com \
    --cc=anup.patel@wdc.com \
    --cc=aou@eecs.berkeley.edu \
    --cc=atish.patra@wdc.com \
    --cc=bgardon@google.com \
    --cc=borntraeger@de.ibm.com \
    --cc=chenhuacai@kernel.org \
    --cc=cohuck@redhat.com \
    --cc=david@redhat.com \
    --cc=frankja@linux.ibm.com \
    --cc=imbrenda@linux.ibm.com \
    --cc=james.morse@arm.com \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=kvm-ppc@vger.kernel.org \
    --cc=kvm-riscv@lists.infradead.org \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mips@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=maciej.szmigiero@oracle.com \
    --cc=maz@kernel.org \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=paulus@ozlabs.org \
    --cc=pbonzini@redhat.com \
    --cc=suzuki.poulose@arm.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).