linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Juergen Gross <jgross@suse.com>
To: kvm@vger.kernel.org, x86@kernel.org, linux-kernel@vger.kernel.org
Cc: maz@kernel.org, ehabkost@redhat.com,
	Juergen Gross <jgross@suse.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <seanjc@google.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	"H. Peter Anvin" <hpa@zytor.com>
Subject: [PATCH v2 3/6] x86/kvm: introduce per cpu vcpu masks
Date: Fri,  3 Sep 2021 15:08:04 +0200	[thread overview]
Message-ID: <20210903130808.30142-4-jgross@suse.com> (raw)
In-Reply-To: <20210903130808.30142-1-jgross@suse.com>

In order to support high vcpu numbers per guest don't use on stack
vcpu bitmasks. As all those currently used bitmasks are not used in
functions subject to recursion it is fairly easy to replace them with
percpu bitmasks.

Disable preemption while such a bitmask is being used in order to
avoid double usage in case we'd switch cpus.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- use local_lock() instead of preempt_disable() (Paolo Bonzini)
---
 arch/x86/include/asm/kvm_host.h | 10 ++++++++++
 arch/x86/kvm/hyperv.c           | 25 ++++++++++++++++++-------
 arch/x86/kvm/irq_comm.c         |  9 +++++++--
 arch/x86/kvm/x86.c              | 22 +++++++++++++++++++++-
 4 files changed, 56 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3513edee8e22..a809a9e4fa5c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -15,6 +15,7 @@
 #include <linux/cpumask.h>
 #include <linux/irq_work.h>
 #include <linux/irq.h>
+#include <linux/local_lock.h>
 
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
@@ -1591,6 +1592,15 @@ extern bool kvm_has_bus_lock_exit;
 /* maximum vcpu-id */
 unsigned int kvm_max_vcpu_id(void);
 
+/* per cpu vcpu bitmasks, protected by kvm_pcpu_mask_lock */
+DECLARE_PER_CPU(local_lock_t, kvm_pcpu_mask_lock);
+extern unsigned long __percpu *kvm_pcpu_vcpu_mask;
+#define KVM_VCPU_MASK_SZ	\
+	(sizeof(*kvm_pcpu_vcpu_mask) * BITS_TO_LONGS(KVM_MAX_VCPUS))
+extern u64 __percpu *kvm_hv_vp_bitmap;
+#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
+#define KVM_HV_VPMAP_SZ		(sizeof(u64) * KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
+
 extern u64 kvm_mce_cap_supported;
 
 /*
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 41d2a53c5dea..680743e43c5b 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -41,7 +41,7 @@
 /* "Hv#1" signature */
 #define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
 
-#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
+u64 __percpu *kvm_hv_vp_bitmap;
 
 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
 				bool vcpu_kick);
@@ -1701,8 +1701,7 @@ static __always_inline unsigned long *sparse_set_to_vcpu_mask(
 	struct kvm_vcpu *vcpu;
 	int i, bank, sbank = 0;
 
-	memset(vp_bitmap, 0,
-	       KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap));
+	memset(vp_bitmap, 0, KVM_HV_VPMAP_SZ);
 	for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
 			 KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
 		vp_bitmap[bank] = sparse_banks[sbank++];
@@ -1740,8 +1739,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 	struct hv_tlb_flush_ex flush_ex;
 	struct hv_tlb_flush flush;
-	u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
-	DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
+	u64 *vp_bitmap;
+	unsigned long *vcpu_bitmap;
 	unsigned long *vcpu_mask;
 	u64 valid_bank_mask;
 	u64 sparse_banks[64];
@@ -1821,6 +1820,10 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 
 	cpumask_clear(&hv_vcpu->tlb_flush);
 
+	local_lock(&kvm_pcpu_mask_lock);
+	vcpu_bitmap = this_cpu_ptr(kvm_pcpu_vcpu_mask);
+	vp_bitmap = this_cpu_ptr(kvm_hv_vp_bitmap);
+
 	vcpu_mask = all_cpus ? NULL :
 		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
 					vp_bitmap, vcpu_bitmap);
@@ -1832,6 +1835,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 	kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
 				    NULL, vcpu_mask, &hv_vcpu->tlb_flush);
 
+	local_unlock(&kvm_pcpu_mask_lock);
+
 ret_success:
 	/* We always do full TLB flush, set 'Reps completed' = 'Rep Count' */
 	return (u64)HV_STATUS_SUCCESS |
@@ -1862,8 +1867,8 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 	struct kvm *kvm = vcpu->kvm;
 	struct hv_send_ipi_ex send_ipi_ex;
 	struct hv_send_ipi send_ipi;
-	u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
-	DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
+	u64 *vp_bitmap;
+	unsigned long *vcpu_bitmap;
 	unsigned long *vcpu_mask;
 	unsigned long valid_bank_mask;
 	u64 sparse_banks[64];
@@ -1920,12 +1925,18 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 	if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
 		return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
+	local_lock(&kvm_pcpu_mask_lock);
+	vcpu_bitmap = this_cpu_ptr(kvm_pcpu_vcpu_mask);
+	vp_bitmap = this_cpu_ptr(kvm_hv_vp_bitmap);
+
 	vcpu_mask = all_cpus ? NULL :
 		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
 					vp_bitmap, vcpu_bitmap);
 
 	kvm_send_ipi_to_many(kvm, vector, vcpu_mask);
 
+	local_unlock(&kvm_pcpu_mask_lock);
+
 ret_success:
 	return HV_STATUS_SUCCESS;
 }
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index d5b72a08e566..c331204de007 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -47,7 +47,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 {
 	int i, r = -1;
 	struct kvm_vcpu *vcpu, *lowest = NULL;
-	unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+	unsigned long *dest_vcpu_bitmap;
 	unsigned int dest_vcpus = 0;
 
 	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
@@ -59,7 +59,10 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 		irq->delivery_mode = APIC_DM_FIXED;
 	}
 
-	memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
+	local_lock(&kvm_pcpu_mask_lock);
+	dest_vcpu_bitmap = this_cpu_ptr(kvm_pcpu_vcpu_mask);
+
+	memset(dest_vcpu_bitmap, 0, KVM_VCPU_MASK_SZ);
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (!kvm_apic_present(vcpu))
@@ -93,6 +96,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 		lowest = kvm_get_vcpu(kvm, idx);
 	}
 
+	local_unlock(&kvm_pcpu_mask_lock);
+
 	if (lowest)
 		r = kvm_apic_set_irq(lowest, irq, dest_map);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b6f38f0b617..fd19b72a5733 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -213,6 +213,10 @@ unsigned int kvm_max_vcpu_id(void)
 }
 EXPORT_SYMBOL_GPL(kvm_max_vcpu_id);
 
+DEFINE_PER_CPU(local_lock_t, kvm_pcpu_mask_lock) =
+	INIT_LOCAL_LOCK(kvm_pcpu_mask_lock);
+unsigned long __percpu *kvm_pcpu_vcpu_mask;
+
 /*
  * Restoring the host value for MSRs that are only consumed when running in
  * usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU
@@ -11029,9 +11033,18 @@ int kvm_arch_hardware_setup(void *opaque)
 	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		rdmsrl(MSR_IA32_XSS, host_xss);
 
+	kvm_pcpu_vcpu_mask = __alloc_percpu(KVM_VCPU_MASK_SZ,
+					    sizeof(unsigned long));
+	kvm_hv_vp_bitmap = __alloc_percpu(KVM_HV_VPMAP_SZ, sizeof(u64));
+
+	if (!kvm_pcpu_vcpu_mask || !kvm_hv_vp_bitmap) {
+		r = -ENOMEM;
+		goto err;
+	}
+
 	r = ops->hardware_setup();
 	if (r != 0)
-		return r;
+		goto err;
 
 	memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
 	kvm_ops_static_call_update();
@@ -11059,11 +11072,18 @@ int kvm_arch_hardware_setup(void *opaque)
 
 	kvm_init_msr_list();
 	return 0;
+
+ err:
+	free_percpu(kvm_pcpu_vcpu_mask);
+	free_percpu(kvm_hv_vp_bitmap);
+	return r;
 }
 
 void kvm_arch_hardware_unsetup(void)
 {
 	static_call(kvm_x86_hardware_unsetup)();
+	free_percpu(kvm_pcpu_vcpu_mask);
+	free_percpu(kvm_hv_vp_bitmap);
 }
 
 int kvm_arch_check_processor_compat(void *opaque)
-- 
2.26.2


  parent reply	other threads:[~2021-09-03 13:08 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-03 13:08 [PATCH v2 0/6] x86/kvm: add boot parameters for max vcpu configs Juergen Gross
2021-09-03 13:08 ` [PATCH v2 1/6] x86/kvm: remove non-x86 stuff from arch/x86/kvm/ioapic.h Juergen Gross
2021-09-03 13:08 ` [PATCH v2 2/6] x86/kvm: add boot parameter for adding vcpu-id bits Juergen Gross
2021-09-03 13:43   ` Vitaly Kuznetsov
2021-09-03 13:53     ` Juergen Gross
2021-09-03 19:48   ` Eduardo Habkost
2021-09-06  4:46     ` Juergen Gross
2021-09-28 16:41   ` Paolo Bonzini
2021-09-03 13:08 ` Juergen Gross [this message]
2021-09-03 16:05   ` [PATCH v2 3/6] x86/kvm: introduce per cpu vcpu masks Eduardo Habkost
2021-09-06  4:34     ` Juergen Gross
2021-09-07 18:34   ` Eduardo Habkost
2021-09-08  8:41     ` Vitaly Kuznetsov
2021-09-03 13:08 ` [PATCH v2 4/6] kvm: use kvfree() in kvm_arch_free_vm() Juergen Gross
2021-09-28 16:48   ` Paolo Bonzini
2021-09-03 13:08 ` [PATCH v2 5/6] kvm: allocate vcpu pointer array separately Juergen Gross
2021-09-03 14:41   ` Marc Zyngier
2021-09-06  4:33     ` Juergen Gross
2021-09-06  9:46       ` Marc Zyngier
2021-09-09 20:28         ` Sean Christopherson
2021-09-03 13:08 ` [PATCH v2 6/6] x86/kvm: add boot parameter for setting max number of vcpus per guest Juergen Gross
2021-09-06  0:45   ` Yao Yuan
2021-09-06  4:47     ` Juergen Gross

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210903130808.30142-4-jgross@suse.com \
    --to=jgross@suse.com \
    --cc=bp@alien8.de \
    --cc=ehabkost@redhat.com \
    --cc=hpa@zytor.com \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maz@kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).